Skip to content

Instantly share code, notes, and snippets.

@masudianpour
Created December 4, 2014 00:32
Show Gist options
  • Save masudianpour/0467e4f98c0e7f7f801e to your computer and use it in GitHub Desktop.
Save masudianpour/0467e4f98c0e7f7f801e to your computer and use it in GitHub Desktop.

Revisions

  1. masudianpour revised this gist Dec 4, 2014. 1 changed file with 0 additions and 12 deletions.
    12 changes: 0 additions & 12 deletions report.py
    Original file line number Diff line number Diff line change
    @@ -219,15 +219,3 @@ def getResult(self):
    #to print the result
    rep.getResult()
    print "Execution time: {seconds} Seconds".format(seconds=time.time()-startTime)

    # Just to make it beautiful :)
    print """
    ______ _ _ _ _
    | ___ \ | | | | | | |
    | |_/ / | __ _ _ _ | | __ _| |__ | |
    | __/| |/ _` | | | | | | / _` | '_ \ | |
    | | | | (_| | |_| | | |___| (_| | |_) | |_|
    \_| |_|\__,_|\__, | \_____/\__,_|_.__/ (_)
    __/ |
    |___/
    """
  2. masudianpour created this gist Dec 4, 2014.
    233 changes: 233 additions & 0 deletions report.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,233 @@
    #!/usr/bin/python
    import re
    from collections import defaultdict
    import time

    '''
    Report Class
    To open a log file, process it and print result
    '''
    class Report(object):

    ''' Class constructor
    Attributes:
    file: log file
    '''
    def __init__(self, file):
    self.file=file
    '''
    a dictionary to hold needed processed text
    Keys:
    pattern: regex pattern to fine url in log file
    count: count of url
    reponses: a list of response times [connect+service]
    dynos: a dictionary to hold all dynos and their occurance count
    '''
    self.report={
    'GET /api/users/{user_id}/count_pending_messages':{
    'pattern': 'GET (\w+=)/api/users/\d+/count_pending_messages',
    'count':0,
    'responses':[],
    'dynos' :{}
    },
    'GET /api/users/{user_id}/get_messages':{
    'pattern': 'GET (\w+=)/api/users/\d+/get_messages',
    'count':0,
    'responses':[],
    'dynos' :{}
    },
    'GET /api/users/{user_id}/get_friends_progress':{
    'pattern': 'GET (\w+=)/api/users/\d+/get_friends_progress',
    'count':0,
    'responses':[],
    'dynos' :{}
    },
    'GET /api/users/{user_id}/get_friends_score':{
    'pattern': 'GET (\w+=)/api/users/\d+/get_friends_score',
    'count':0,
    'responses':[],
    'dynos' :{}
    },
    'POST /api/users/{user_id}':{
    'pattern': 'POST (\w+=)/api/users/\d+ ',
    'count':0,
    'responses':[],
    'dynos' :{}
    },
    'GET /api/users/{user_id}':{
    'pattern': 'GET (\w+=)/api/users/\d+ ',
    'count':0,
    'responses':[],
    'dynos' :{}
    },
    }
    #starting to process log file
    self.start()
    pass

    ''' To start processing the log file
    this method opens the log file, processes it, puts the needed information into report dictionary
    '''
    def start(self):
    try:
    reportFile=open(self.file,'r')
    lines=reportFile.readlines()
    #surfing the file line by line :)
    for line in lines:
    #looking for all of our URLS in report dictionary
    for key in self.report.keys():
    #if matched current url, I start to fill report dictionary with needed information
    if(re.search(self.report[key]['pattern'],line) is not None):
    #increasing the count of this url existency
    self.report[key]['count']+=1
    #appending the response time of current line into responses key
    self.report[key]['responses'].append(self.getResponseTime(line))
    #getting dyno of current line
    dyno=self.getDyno(line)
    '''
    here:
    if dyno of current line has been already added, I only increase
    its existency value.
    If not, I add it into report dictionary
    '''
    if self.report[key]['dynos'].has_key(dyno):
    self.report[key]['dynos'][dyno]+=1
    else:
    self.report[key]['dynos'][dyno]=1
    reportFile.close()
    #if any exception occured
    except:
    print "Can not open log file ({file}) [Please check file permission or file existency!]".format(file=self.file)
    exit()
    pass

    '''
    To get reponse time of a line in log file
    Attributes:
    line: the line in loge file
    return:
    integer reponse time (connect+service)
    '''
    def getResponseTime(self,line):
    connect=re.findall("\d+",re.search(' connect=\d+ms ',line).group())[0]
    service=re.findall("\d+",re.search(' service=\d+ms ',line).group())[0]
    return int(connect)+int(service)
    '''
    To get dyno of a line in log file
    Attributes:
    line: the line in loge file
    return:
    string Dyno of current line
    '''
    def getDyno(self,line):
    return re.findall('web.\d+',re.search(' dyno=web.\d+ ',line).group())[0];

    '''
    To calculate the most responded dyno
    As I have stored all dynos and their count, here I get the most occured dyno
    anyway, 1- I get the maximum of count of a all dynos for a url
    2- I do something like PHP's array_flip function to flip dynos keys and values
    3- I return the most occured dyno name
    Attributes:
    items: a dictionary of dynos and their count for a url
    return:
    string the most responded dyno
    '''
    def calculateMostRespondedDyno(self,items):
    if(len(items)==0):
    return 0
    maxDyno=max(items.values());
    #array_flip :D
    flippedDyno=dict(zip(items.values(),items.keys()))
    return flippedDyno[maxDyno]

    '''
    to calculate reponse time Mode
    I just have done a simple copy past from the following url!(with a little manipulation):
    http://rosettacode.org/wiki/Averages/Mode
    However, I prefer to use scipy or numpy libraries
    Attributes:
    items: reponse times list
    return:
    string formatted response time mode
    '''
    def calculateMode(self,items):
    if(len(items)==0):
    return 0
    count=defaultdict(int)
    for v in items:
    count[v]+=1
    best=max(count.values())
    return "%d ms" % ([k for k,v in count.items() if v == best][0])

    '''
    to calculate reponse time Median
    I just have done a simple copy past from the following url!(with a little manipulation):
    http://rosettacode.org/wiki/Averages/Median
    However, I prefer to use scipy or numpy libraries
    Attributes:
    items: reponse times list
    return:
    string formatted response time median
    '''
    def calculateMedian(self,items):
    if(len(items)==0):
    return 0
    items.sort()
    itemsLen=len(items)
    return "%d ms" % (0.5*( items[(itemsLen-1)//2] + items[itemsLen//2]))

    '''
    to calculate reponse time Mean
    Attributes:
    items: reponse times list
    return:
    string formatted response time mean
    '''
    def calculateMean(self,items):
    return "%.3f ms" % (sum(items)/float(len(items)) if items else 0)

    '''
    To print formatted report result
    '''
    def getResult(self):
    report=self.report
    for url in self.report.keys():
    print "{url}".format(url=url)
    print "-"*110
    print "Count\t Response Time(Mode) \t Response Time(Mean)\t Response Time(Median)\t\t Most Responded Dyno"
    print "-"*110
    print "{count}\t\t{mode}\t\t\t{mean}\t\t{median}\t\t\t\t{mostRespondedDyno}".format(count=report[url]['count'],
    mean=self.calculateMean(report[url]['responses']),
    mode=self.calculateMode(report[url]['responses']),
    median=self.calculateMedian(report[url]['responses']),
    mostRespondedDyno=self.calculateMostRespondedDyno(report[url]['dynos']))
    print "="*110






    #It starts processing the report file at initiation
    rep=Report('sample.log')
    startTime=time.time()
    #to print the result
    rep.getResult()
    print "Execution time: {seconds} Seconds".format(seconds=time.time()-startTime)

    # Just to make it beautiful :)
    print """
    ______ _ _ _ _
    | ___ \ | | | | | | |
    | |_/ / | __ _ _ _ | | __ _| |__ | |
    | __/| |/ _` | | | | | | / _` | '_ \ | |
    | | | | (_| | |_| | | |___| (_| | |_) | |_|
    \_| |_|\__,_|\__, | \_____/\__,_|_.__/ (_)
    __/ |
    |___/
    """