Codegass · July 9, 2022 13:39
diff --git a/pythonparser.py b/pythonparser.py
 class McCabe():

    def __init__(self, prefix_path, tagged_file_path, project_list,project_github_name_path):
        ''' '''
        self.prefix_path = prefix_path
        self.tagged_file_path = tagged_file_path
        self.project_list = project_list
        self.project_github_name_path = project_github_name_path
        self.df = pd.DataFrame(columns=['project','file','test_case','if','else','for','while','try','catch','switch'])
        for i in range(4):
            print(project_list[i])

            java_file_list, test_case_list = self.read_tagged_files(self.tagged_file_path + self.project_list[i])
            
            for j in range(len(java_file_list)):
                self.read_java_file(self.prefix_path,self.project_github_name_path[i],java_file_list[j],test_case_list[j])
            
        self.df.to_csv('./inbox/Complexity.csv',index=False)
        print('Done')      

    def findAllFile(self,base):
        for root, ds, fs in os.walk(base):
            for f in fs:
                fullname = os.path.join(root, f)
                yield fullname

    def read_tagged_files(self, tagged_file_path):
        '''
        read tagged files, and get the file name & test case name
        BE CAREFUL the design is to collcet all files within on project
        The poject loop should be outside
        '''
        java_files_list = []
        test_case_list = []
        for root, dirs, files in os.walk(tagged_file_path):
            for name in files:
                if name.endswith('.csv') or name.endswith('.xlsx'):
                    java_file = name.split('.')[0].split('_')[1]
                    test_case = name.split('.')[-2]
                    java_files_list.append(java_file)
                    test_case_list.append(test_case)
        return java_files_list, test_case_list

    def read_java_file(self, prefix_path, github_project,java_file,test_case):
        '''
        read java file
        '''
        
        for file in self.findAllFile(prefix_path+'/'+github_project):
            if file.split('/')[-1] == java_file+'.java':
                with open(file, 'r') as f:
                    lines = f.readlines()
                    result = self.read_test_case(test_case,lines)
                if result[0]:
                    # print('{} {} {} {} {} {} {} {}'.format(github_project,java_file,test_case,result[1],result[2],result[3],result[4],result[5],result[6]))
                    self.df = self.df.append({'project':github_project,'file':java_file,'test_case':test_case,'if':result[1],'else':result[2],'for':result[3],'while':result[4],'try':result[5],'catch':result[6],'switch':result[7]},ignore_index=True)   
                    # print(self.df)
    def read_test_case(self,test_case,lines):
        start_analysis = False
        count_if = 0
        count_else = 0
        count_for = 0
        count_while = 0
        count_try = 0
        count_catch = 0
        count_switch = 0

        left_bracket = 0
        right_bracket = 0

        for line in lines:
            if re.match(r'\s*//',line) or re.match(r'\s*\*',line): # ignore comments
                continue
            if re.search(test_case,line):
                start_analysis = True
            if start_analysis:
                
                ### search for the brackets, to make sure the test ends
                if re.search('{',line):
                    pattern = re.compile(r'{')
                    l_bracket_number = len(pattern.findall(line))
                    left_bracket += l_bracket_number
                if re.search('}',line):
                    pattern = re.compile(r'}')
                    r_bracket_number = len(pattern.findall(line))
                    right_bracket += r_bracket_number
                if left_bracket == right_bracket and left_bracket != 0:
                    break
                if re.match(r'\s*//',line) or re.match(r'\s*\*',line): # ignore comments
                    continue
                ##### Search for if, else, for, while, try, catch, switch
                if re.search(r"\bif\b",line):
                    count_if += 1
                if re.search(r'\belse\b',line):
                    count_else += 1
                if re.search(r'\bfor\b',line):
                    count_for += 1
                if re.search(r'\bwhile\b',line):
                    count_while += 1
                if re.search(r'\btry\b',line):
                    count_try += 1
                if re.search(r'\bcatch\b',line):
                    count_catch += 1
                if re.search(r'\bswitch\b',line):
                    count_switch += 1
        if count_if != 0 or count_else != 0 or count_for != 0 or count_while != 0 or count_try != 0 or count_catch != 0 or count_switch != 0:
            return (True,count_if,count_else,count_for,count_while,count_try,count_catch,count_switch)
        else:
            return (False,count_if,count_else,count_for,count_while,count_try,count_catch,count_switch)
	class McCabe():

	def __init__(self, prefix_path, tagged_file_path, project_list,project_github_name_path):
	''' '''
	self.prefix_path = prefix_path
	self.tagged_file_path = tagged_file_path
	self.project_list = project_list
	self.project_github_name_path = project_github_name_path
	self.df = pd.DataFrame(columns=['project','file','test_case','if','else','for','while','try','catch','switch'])
	for i in range(4):
	print(project_list[i])

	java_file_list, test_case_list = self.read_tagged_files(self.tagged_file_path + self.project_list[i])

	for j in range(len(java_file_list)):
	self.read_java_file(self.prefix_path,self.project_github_name_path[i],java_file_list[j],test_case_list[j])

	self.df.to_csv('./inbox/Complexity.csv',index=False)
	print('Done')

	def findAllFile(self,base):
	for root, ds, fs in os.walk(base):
	for f in fs:
	fullname = os.path.join(root, f)
	yield fullname

	def read_tagged_files(self, tagged_file_path):
	'''
	read tagged files, and get the file name & test case name
	BE CAREFUL the design is to collcet all files within on project
	The poject loop should be outside
	'''
	java_files_list = []
	test_case_list = []
	for root, dirs, files in os.walk(tagged_file_path):
	for name in files:
	if name.endswith('.csv') or name.endswith('.xlsx'):
	java_file = name.split('.')[0].split('_')[1]
	test_case = name.split('.')[-2]
	java_files_list.append(java_file)
	test_case_list.append(test_case)
	return java_files_list, test_case_list

	def read_java_file(self, prefix_path, github_project,java_file,test_case):
	'''
	read java file
	'''

	for file in self.findAllFile(prefix_path+'/'+github_project):
	if file.split('/')[-1] == java_file+'.java':
	with open(file, 'r') as f:
	lines = f.readlines()
	result = self.read_test_case(test_case,lines)
	if result[0]:
	# print('{} {} {} {} {} {} {} {}'.format(github_project,java_file,test_case,result[1],result[2],result[3],result[4],result[5],result[6]))
	self.df = self.df.append({'project':github_project,'file':java_file,'test_case':test_case,'if':result[1],'else':result[2],'for':result[3],'while':result[4],'try':result[5],'catch':result[6],'switch':result[7]},ignore_index=True)
	# print(self.df)
	def read_test_case(self,test_case,lines):
	start_analysis = False
	count_if = 0
	count_else = 0
	count_for = 0
	count_while = 0
	count_try = 0
	count_catch = 0
	count_switch = 0

	left_bracket = 0
	right_bracket = 0

	for line in lines:
	if re.match(r'\s//',line) or re.match(r'\s\*',line): # ignore comments
	continue
	if re.search(test_case,line):
	start_analysis = True
	if start_analysis:

	### search for the brackets, to make sure the test ends
	if re.search('{',line):
	pattern = re.compile(r'{')
	l_bracket_number = len(pattern.findall(line))
	left_bracket += l_bracket_number
	if re.search('}',line):
	pattern = re.compile(r'}')
	r_bracket_number = len(pattern.findall(line))
	right_bracket += r_bracket_number
	if left_bracket == right_bracket and left_bracket != 0:
	break
	if re.match(r'\s//',line) or re.match(r'\s\*',line): # ignore comments
	continue
	##### Search for if, else, for, while, try, catch, switch
	if re.search(r"\bif\b",line):
	count_if += 1
	if re.search(r'\belse\b',line):
	count_else += 1
	if re.search(r'\bfor\b',line):
	count_for += 1
	if re.search(r'\bwhile\b',line):
	count_while += 1
	if re.search(r'\btry\b',line):
	count_try += 1
	if re.search(r'\bcatch\b',line):
	count_catch += 1
	if re.search(r'\bswitch\b',line):
	count_switch += 1
	if count_if != 0 or count_else != 0 or count_for != 0 or count_while != 0 or count_try != 0 or count_catch != 0 or count_switch != 0:
	return (True,count_if,count_else,count_for,count_while,count_try,count_catch,count_switch)
	else:
	return (False,count_if,count_else,count_for,count_while,count_try,count_catch,count_switch)
No results found