Skip to content

Instantly share code, notes, and snippets.

@Codegass
Created July 9, 2022 13:39
Show Gist options
  • Select an option

  • Save Codegass/8ea0b564277b9e733645dca423cc4ef8 to your computer and use it in GitHub Desktop.

Select an option

Save Codegass/8ea0b564277b9e733645dca423cc4ef8 to your computer and use it in GitHub Desktop.
python parser
class McCabe():
def __init__(self, prefix_path, tagged_file_path, project_list,project_github_name_path):
''' '''
self.prefix_path = prefix_path
self.tagged_file_path = tagged_file_path
self.project_list = project_list
self.project_github_name_path = project_github_name_path
self.df = pd.DataFrame(columns=['project','file','test_case','if','else','for','while','try','catch','switch'])
for i in range(4):
print(project_list[i])
java_file_list, test_case_list = self.read_tagged_files(self.tagged_file_path + self.project_list[i])
for j in range(len(java_file_list)):
self.read_java_file(self.prefix_path,self.project_github_name_path[i],java_file_list[j],test_case_list[j])
self.df.to_csv('./inbox/Complexity.csv',index=False)
print('Done')
def findAllFile(self,base):
for root, ds, fs in os.walk(base):
for f in fs:
fullname = os.path.join(root, f)
yield fullname
def read_tagged_files(self, tagged_file_path):
'''
read tagged files, and get the file name & test case name
BE CAREFUL the design is to collcet all files within on project
The poject loop should be outside
'''
java_files_list = []
test_case_list = []
for root, dirs, files in os.walk(tagged_file_path):
for name in files:
if name.endswith('.csv') or name.endswith('.xlsx'):
java_file = name.split('.')[0].split('_')[1]
test_case = name.split('.')[-2]
java_files_list.append(java_file)
test_case_list.append(test_case)
return java_files_list, test_case_list
def read_java_file(self, prefix_path, github_project,java_file,test_case):
'''
read java file
'''
for file in self.findAllFile(prefix_path+'/'+github_project):
if file.split('/')[-1] == java_file+'.java':
with open(file, 'r') as f:
lines = f.readlines()
result = self.read_test_case(test_case,lines)
if result[0]:
# print('{} {} {} {} {} {} {} {}'.format(github_project,java_file,test_case,result[1],result[2],result[3],result[4],result[5],result[6]))
self.df = self.df.append({'project':github_project,'file':java_file,'test_case':test_case,'if':result[1],'else':result[2],'for':result[3],'while':result[4],'try':result[5],'catch':result[6],'switch':result[7]},ignore_index=True)
# print(self.df)
def read_test_case(self,test_case,lines):
start_analysis = False
count_if = 0
count_else = 0
count_for = 0
count_while = 0
count_try = 0
count_catch = 0
count_switch = 0
left_bracket = 0
right_bracket = 0
for line in lines:
if re.match(r'\s*//',line) or re.match(r'\s*\*',line): # ignore comments
continue
if re.search(test_case,line):
start_analysis = True
if start_analysis:
### search for the brackets, to make sure the test ends
if re.search('{',line):
pattern = re.compile(r'{')
l_bracket_number = len(pattern.findall(line))
left_bracket += l_bracket_number
if re.search('}',line):
pattern = re.compile(r'}')
r_bracket_number = len(pattern.findall(line))
right_bracket += r_bracket_number
if left_bracket == right_bracket and left_bracket != 0:
break
if re.match(r'\s*//',line) or re.match(r'\s*\*',line): # ignore comments
continue
##### Search for if, else, for, while, try, catch, switch
if re.search(r"\bif\b",line):
count_if += 1
if re.search(r'\belse\b',line):
count_else += 1
if re.search(r'\bfor\b',line):
count_for += 1
if re.search(r'\bwhile\b',line):
count_while += 1
if re.search(r'\btry\b',line):
count_try += 1
if re.search(r'\bcatch\b',line):
count_catch += 1
if re.search(r'\bswitch\b',line):
count_switch += 1
if count_if != 0 or count_else != 0 or count_for != 0 or count_while != 0 or count_try != 0 or count_catch != 0 or count_switch != 0:
return (True,count_if,count_else,count_for,count_while,count_try,count_catch,count_switch)
else:
return (False,count_if,count_else,count_for,count_while,count_try,count_catch,count_switch)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment