Skip to content

Instantly share code, notes, and snippets.

@Codegass
Created July 9, 2022 13:39
Show Gist options
  • Select an option

  • Save Codegass/8ea0b564277b9e733645dca423cc4ef8 to your computer and use it in GitHub Desktop.

Select an option

Save Codegass/8ea0b564277b9e733645dca423cc4ef8 to your computer and use it in GitHub Desktop.

Revisions

  1. Codegass created this gist Jul 9, 2022.
    109 changes: 109 additions & 0 deletions pythonparser.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,109 @@
    class McCabe():

    def __init__(self, prefix_path, tagged_file_path, project_list,project_github_name_path):
    ''' '''
    self.prefix_path = prefix_path
    self.tagged_file_path = tagged_file_path
    self.project_list = project_list
    self.project_github_name_path = project_github_name_path
    self.df = pd.DataFrame(columns=['project','file','test_case','if','else','for','while','try','catch','switch'])
    for i in range(4):
    print(project_list[i])

    java_file_list, test_case_list = self.read_tagged_files(self.tagged_file_path + self.project_list[i])

    for j in range(len(java_file_list)):
    self.read_java_file(self.prefix_path,self.project_github_name_path[i],java_file_list[j],test_case_list[j])

    self.df.to_csv('./inbox/Complexity.csv',index=False)
    print('Done')

    def findAllFile(self,base):
    for root, ds, fs in os.walk(base):
    for f in fs:
    fullname = os.path.join(root, f)
    yield fullname

    def read_tagged_files(self, tagged_file_path):
    '''
    read tagged files, and get the file name & test case name
    BE CAREFUL the design is to collcet all files within on project
    The poject loop should be outside
    '''
    java_files_list = []
    test_case_list = []
    for root, dirs, files in os.walk(tagged_file_path):
    for name in files:
    if name.endswith('.csv') or name.endswith('.xlsx'):
    java_file = name.split('.')[0].split('_')[1]
    test_case = name.split('.')[-2]
    java_files_list.append(java_file)
    test_case_list.append(test_case)
    return java_files_list, test_case_list

    def read_java_file(self, prefix_path, github_project,java_file,test_case):
    '''
    read java file
    '''

    for file in self.findAllFile(prefix_path+'/'+github_project):
    if file.split('/')[-1] == java_file+'.java':
    with open(file, 'r') as f:
    lines = f.readlines()
    result = self.read_test_case(test_case,lines)
    if result[0]:
    # print('{} {} {} {} {} {} {} {}'.format(github_project,java_file,test_case,result[1],result[2],result[3],result[4],result[5],result[6]))
    self.df = self.df.append({'project':github_project,'file':java_file,'test_case':test_case,'if':result[1],'else':result[2],'for':result[3],'while':result[4],'try':result[5],'catch':result[6],'switch':result[7]},ignore_index=True)
    # print(self.df)
    def read_test_case(self,test_case,lines):
    start_analysis = False
    count_if = 0
    count_else = 0
    count_for = 0
    count_while = 0
    count_try = 0
    count_catch = 0
    count_switch = 0

    left_bracket = 0
    right_bracket = 0

    for line in lines:
    if re.match(r'\s*//',line) or re.match(r'\s*\*',line): # ignore comments
    continue
    if re.search(test_case,line):
    start_analysis = True
    if start_analysis:

    ### search for the brackets, to make sure the test ends
    if re.search('{',line):
    pattern = re.compile(r'{')
    l_bracket_number = len(pattern.findall(line))
    left_bracket += l_bracket_number
    if re.search('}',line):
    pattern = re.compile(r'}')
    r_bracket_number = len(pattern.findall(line))
    right_bracket += r_bracket_number
    if left_bracket == right_bracket and left_bracket != 0:
    break
    if re.match(r'\s*//',line) or re.match(r'\s*\*',line): # ignore comments
    continue
    ##### Search for if, else, for, while, try, catch, switch
    if re.search(r"\bif\b",line):
    count_if += 1
    if re.search(r'\belse\b',line):
    count_else += 1
    if re.search(r'\bfor\b',line):
    count_for += 1
    if re.search(r'\bwhile\b',line):
    count_while += 1
    if re.search(r'\btry\b',line):
    count_try += 1
    if re.search(r'\bcatch\b',line):
    count_catch += 1
    if re.search(r'\bswitch\b',line):
    count_switch += 1
    if count_if != 0 or count_else != 0 or count_for != 0 or count_while != 0 or count_try != 0 or count_catch != 0 or count_switch != 0:
    return (True,count_if,count_else,count_for,count_while,count_try,count_catch,count_switch)
    else:
    return (False,count_if,count_else,count_for,count_while,count_try,count_catch,count_switch)