Created
February 17, 2014 09:40
-
-
Save public/9047618 to your computer and use it in GitHub Desktop.
Revisions
-
public created this gist
Feb 17, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,101 @@ import sys import time import hotshot import hotshot.stats import ipdb import openpyxl def get_process_rss(): procstat = open("/proc/self/status").readlines() for line in procstat: if line.startswith("VmSize:"): return int(line.split()[1]) * 1024 else: return 0 def highest_row(sheet): return sheet.get_highest_row() def highest_column(sheet): try: return max( openpyxl.cell.column_index_from_string(cell.column) for cell in sheet._cells.itervalues() ) - 1 except ValueError: return 1 def main(args): start = time.time() start_mem = get_process_rss() print "start", start, start_mem wb = openpyxl.load_workbook(args[1]) opened = time.time() opened_mem = get_process_rss() print "open", opened-start, opened_mem-start_mem # We are going to go and count the numberof cells with values, # the number of cells within our bounding box, and the number of # cell.value accesses we do. values = 0 cells = 0 accesses = 0 for sheet in wb.worksheets: rows = highest_row(sheet) columns = highest_column(sheet) for r in xrange(rows): blanks = 0 for c in xrange(columns): cell = sheet.cell(row=r, column=c) if cell.value is not None: values += 1 else: blanks += 1 cell.offset(row=1, column=1).value accesses += 2 cells += 1 if blanks == c+1: break end = time.time() total = end-start done_mem = get_process_rss() print "read", total, cells, values, accesses, done_mem-start_mem cells = float(cells) accesses = float(accesses) values = float(values) print (total/accesses)*1000, "ms per cell access" print (done_mem-start_mem) / cells, "bytes per cell" print (done_mem-start_mem) / values, "bytes per value" if 0: prof = hotshot.Profile("speed.prof") prof.runcall(main, sys.argv) prof.close() stats = hotshot.stats.load("speed.prof") stats.strip_dirs() stats.sort_stats('time', 'calls') ipdb.set_trace() else: main(sys.argv)