Skip to content

Instantly share code, notes, and snippets.

@masudianpour
Created December 4, 2014 00:32
Show Gist options
  • Save masudianpour/0467e4f98c0e7f7f801e to your computer and use it in GitHub Desktop.
Save masudianpour/0467e4f98c0e7f7f801e to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import re
from collections import defaultdict
import time
'''
Report Class
To open a log file, process it and print result
'''
class Report(object):
''' Class constructor
Attributes:
file: log file
'''
def __init__(self, file):
self.file=file
'''
a dictionary to hold needed processed text
Keys:
pattern: regex pattern to fine url in log file
count: count of url
reponses: a list of response times [connect+service]
dynos: a dictionary to hold all dynos and their occurance count
'''
self.report={
'GET /api/users/{user_id}/count_pending_messages':{
'pattern': 'GET (\w+=)/api/users/\d+/count_pending_messages',
'count':0,
'responses':[],
'dynos' :{}
},
'GET /api/users/{user_id}/get_messages':{
'pattern': 'GET (\w+=)/api/users/\d+/get_messages',
'count':0,
'responses':[],
'dynos' :{}
},
'GET /api/users/{user_id}/get_friends_progress':{
'pattern': 'GET (\w+=)/api/users/\d+/get_friends_progress',
'count':0,
'responses':[],
'dynos' :{}
},
'GET /api/users/{user_id}/get_friends_score':{
'pattern': 'GET (\w+=)/api/users/\d+/get_friends_score',
'count':0,
'responses':[],
'dynos' :{}
},
'POST /api/users/{user_id}':{
'pattern': 'POST (\w+=)/api/users/\d+ ',
'count':0,
'responses':[],
'dynos' :{}
},
'GET /api/users/{user_id}':{
'pattern': 'GET (\w+=)/api/users/\d+ ',
'count':0,
'responses':[],
'dynos' :{}
},
}
#starting to process log file
self.start()
pass
''' To start processing the log file
this method opens the log file, processes it, puts the needed information into report dictionary
'''
def start(self):
try:
reportFile=open(self.file,'r')
lines=reportFile.readlines()
#surfing the file line by line :)
for line in lines:
#looking for all of our URLS in report dictionary
for key in self.report.keys():
#if matched current url, I start to fill report dictionary with needed information
if(re.search(self.report[key]['pattern'],line) is not None):
#increasing the count of this url existency
self.report[key]['count']+=1
#appending the response time of current line into responses key
self.report[key]['responses'].append(self.getResponseTime(line))
#getting dyno of current line
dyno=self.getDyno(line)
'''
here:
if dyno of current line has been already added, I only increase
its existency value.
If not, I add it into report dictionary
'''
if self.report[key]['dynos'].has_key(dyno):
self.report[key]['dynos'][dyno]+=1
else:
self.report[key]['dynos'][dyno]=1
reportFile.close()
#if any exception occured
except:
print "Can not open log file ({file}) [Please check file permission or file existency!]".format(file=self.file)
exit()
pass
'''
To get reponse time of a line in log file
Attributes:
line: the line in loge file
return:
integer reponse time (connect+service)
'''
def getResponseTime(self,line):
connect=re.findall("\d+",re.search(' connect=\d+ms ',line).group())[0]
service=re.findall("\d+",re.search(' service=\d+ms ',line).group())[0]
return int(connect)+int(service)
'''
To get dyno of a line in log file
Attributes:
line: the line in loge file
return:
string Dyno of current line
'''
def getDyno(self,line):
return re.findall('web.\d+',re.search(' dyno=web.\d+ ',line).group())[0];
'''
To calculate the most responded dyno
As I have stored all dynos and their count, here I get the most occured dyno
anyway, 1- I get the maximum of count of a all dynos for a url
2- I do something like PHP's array_flip function to flip dynos keys and values
3- I return the most occured dyno name
Attributes:
items: a dictionary of dynos and their count for a url
return:
string the most responded dyno
'''
def calculateMostRespondedDyno(self,items):
if(len(items)==0):
return 0
maxDyno=max(items.values());
#array_flip :D
flippedDyno=dict(zip(items.values(),items.keys()))
return flippedDyno[maxDyno]
'''
to calculate reponse time Mode
I just have done a simple copy past from the following url!(with a little manipulation):
http://rosettacode.org/wiki/Averages/Mode
However, I prefer to use scipy or numpy libraries
Attributes:
items: reponse times list
return:
string formatted response time mode
'''
def calculateMode(self,items):
if(len(items)==0):
return 0
count=defaultdict(int)
for v in items:
count[v]+=1
best=max(count.values())
return "%d ms" % ([k for k,v in count.items() if v == best][0])
'''
to calculate reponse time Median
I just have done a simple copy past from the following url!(with a little manipulation):
http://rosettacode.org/wiki/Averages/Median
However, I prefer to use scipy or numpy libraries
Attributes:
items: reponse times list
return:
string formatted response time median
'''
def calculateMedian(self,items):
if(len(items)==0):
return 0
items.sort()
itemsLen=len(items)
return "%d ms" % (0.5*( items[(itemsLen-1)//2] + items[itemsLen//2]))
'''
to calculate reponse time Mean
Attributes:
items: reponse times list
return:
string formatted response time mean
'''
def calculateMean(self,items):
return "%.3f ms" % (sum(items)/float(len(items)) if items else 0)
'''
To print formatted report result
'''
def getResult(self):
report=self.report
for url in self.report.keys():
print "{url}".format(url=url)
print "-"*110
print "Count\t Response Time(Mode) \t Response Time(Mean)\t Response Time(Median)\t\t Most Responded Dyno"
print "-"*110
print "{count}\t\t{mode}\t\t\t{mean}\t\t{median}\t\t\t\t{mostRespondedDyno}".format(count=report[url]['count'],
mean=self.calculateMean(report[url]['responses']),
mode=self.calculateMode(report[url]['responses']),
median=self.calculateMedian(report[url]['responses']),
mostRespondedDyno=self.calculateMostRespondedDyno(report[url]['dynos']))
print "="*110
#It starts processing the report file at initiation
rep=Report('sample.log')
startTime=time.time()
#to print the result
rep.getResult()
print "Execution time: {seconds} Seconds".format(seconds=time.time()-startTime)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment