Created
July 9, 2022 13:39
-
-
Save Codegass/8ea0b564277b9e733645dca423cc4ef8 to your computer and use it in GitHub Desktop.
python parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class McCabe(): | |
| def __init__(self, prefix_path, tagged_file_path, project_list,project_github_name_path): | |
| ''' ''' | |
| self.prefix_path = prefix_path | |
| self.tagged_file_path = tagged_file_path | |
| self.project_list = project_list | |
| self.project_github_name_path = project_github_name_path | |
| self.df = pd.DataFrame(columns=['project','file','test_case','if','else','for','while','try','catch','switch']) | |
| for i in range(4): | |
| print(project_list[i]) | |
| java_file_list, test_case_list = self.read_tagged_files(self.tagged_file_path + self.project_list[i]) | |
| for j in range(len(java_file_list)): | |
| self.read_java_file(self.prefix_path,self.project_github_name_path[i],java_file_list[j],test_case_list[j]) | |
| self.df.to_csv('./inbox/Complexity.csv',index=False) | |
| print('Done') | |
| def findAllFile(self,base): | |
| for root, ds, fs in os.walk(base): | |
| for f in fs: | |
| fullname = os.path.join(root, f) | |
| yield fullname | |
| def read_tagged_files(self, tagged_file_path): | |
| ''' | |
| read tagged files, and get the file name & test case name | |
| BE CAREFUL the design is to collcet all files within on project | |
| The poject loop should be outside | |
| ''' | |
| java_files_list = [] | |
| test_case_list = [] | |
| for root, dirs, files in os.walk(tagged_file_path): | |
| for name in files: | |
| if name.endswith('.csv') or name.endswith('.xlsx'): | |
| java_file = name.split('.')[0].split('_')[1] | |
| test_case = name.split('.')[-2] | |
| java_files_list.append(java_file) | |
| test_case_list.append(test_case) | |
| return java_files_list, test_case_list | |
| def read_java_file(self, prefix_path, github_project,java_file,test_case): | |
| ''' | |
| read java file | |
| ''' | |
| for file in self.findAllFile(prefix_path+'/'+github_project): | |
| if file.split('/')[-1] == java_file+'.java': | |
| with open(file, 'r') as f: | |
| lines = f.readlines() | |
| result = self.read_test_case(test_case,lines) | |
| if result[0]: | |
| # print('{} {} {} {} {} {} {} {}'.format(github_project,java_file,test_case,result[1],result[2],result[3],result[4],result[5],result[6])) | |
| self.df = self.df.append({'project':github_project,'file':java_file,'test_case':test_case,'if':result[1],'else':result[2],'for':result[3],'while':result[4],'try':result[5],'catch':result[6],'switch':result[7]},ignore_index=True) | |
| # print(self.df) | |
| def read_test_case(self,test_case,lines): | |
| start_analysis = False | |
| count_if = 0 | |
| count_else = 0 | |
| count_for = 0 | |
| count_while = 0 | |
| count_try = 0 | |
| count_catch = 0 | |
| count_switch = 0 | |
| left_bracket = 0 | |
| right_bracket = 0 | |
| for line in lines: | |
| if re.match(r'\s*//',line) or re.match(r'\s*\*',line): # ignore comments | |
| continue | |
| if re.search(test_case,line): | |
| start_analysis = True | |
| if start_analysis: | |
| ### search for the brackets, to make sure the test ends | |
| if re.search('{',line): | |
| pattern = re.compile(r'{') | |
| l_bracket_number = len(pattern.findall(line)) | |
| left_bracket += l_bracket_number | |
| if re.search('}',line): | |
| pattern = re.compile(r'}') | |
| r_bracket_number = len(pattern.findall(line)) | |
| right_bracket += r_bracket_number | |
| if left_bracket == right_bracket and left_bracket != 0: | |
| break | |
| if re.match(r'\s*//',line) or re.match(r'\s*\*',line): # ignore comments | |
| continue | |
| ##### Search for if, else, for, while, try, catch, switch | |
| if re.search(r"\bif\b",line): | |
| count_if += 1 | |
| if re.search(r'\belse\b',line): | |
| count_else += 1 | |
| if re.search(r'\bfor\b',line): | |
| count_for += 1 | |
| if re.search(r'\bwhile\b',line): | |
| count_while += 1 | |
| if re.search(r'\btry\b',line): | |
| count_try += 1 | |
| if re.search(r'\bcatch\b',line): | |
| count_catch += 1 | |
| if re.search(r'\bswitch\b',line): | |
| count_switch += 1 | |
| if count_if != 0 or count_else != 0 or count_for != 0 or count_while != 0 or count_try != 0 or count_catch != 0 or count_switch != 0: | |
| return (True,count_if,count_else,count_for,count_while,count_try,count_catch,count_switch) | |
| else: | |
| return (False,count_if,count_else,count_for,count_while,count_try,count_catch,count_switch) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment