Skip to content

Instantly share code, notes, and snippets.

@public
Created February 17, 2014 09:40
Show Gist options
  • Select an option

  • Save public/9047618 to your computer and use it in GitHub Desktop.

Select an option

Save public/9047618 to your computer and use it in GitHub Desktop.

Revisions

  1. public created this gist Feb 17, 2014.
    101 changes: 101 additions & 0 deletions benchmark.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,101 @@
    import sys
    import time
    import hotshot
    import hotshot.stats
    import ipdb

    import openpyxl

    def get_process_rss():
    procstat = open("/proc/self/status").readlines()
    for line in procstat:
    if line.startswith("VmSize:"):
    return int(line.split()[1]) * 1024
    else:
    return 0


    def highest_row(sheet):
    return sheet.get_highest_row()


    def highest_column(sheet):
    try:
    return max(
    openpyxl.cell.column_index_from_string(cell.column)
    for cell in sheet._cells.itervalues()
    ) - 1
    except ValueError:
    return 1


    def main(args):
    start = time.time()
    start_mem = get_process_rss()
    print "start", start, start_mem

    wb = openpyxl.load_workbook(args[1])

    opened = time.time()
    opened_mem = get_process_rss()
    print "open", opened-start, opened_mem-start_mem


    # We are going to go and count the numberof cells with values,
    # the number of cells within our bounding box, and the number of
    # cell.value accesses we do.

    values = 0
    cells = 0
    accesses = 0

    for sheet in wb.worksheets:
    rows = highest_row(sheet)
    columns = highest_column(sheet)

    for r in xrange(rows):
    blanks = 0

    for c in xrange(columns):
    cell = sheet.cell(row=r, column=c)

    if cell.value is not None:
    values += 1
    else:
    blanks += 1

    cell.offset(row=1, column=1).value

    accesses += 2
    cells += 1

    if blanks == c+1:
    break

    end = time.time()
    total = end-start
    done_mem = get_process_rss()

    print "read", total, cells, values, accesses, done_mem-start_mem

    cells = float(cells)
    accesses = float(accesses)
    values = float(values)

    print (total/accesses)*1000, "ms per cell access"
    print (done_mem-start_mem) / cells, "bytes per cell"
    print (done_mem-start_mem) / values, "bytes per value"


    if 0:
    prof = hotshot.Profile("speed.prof")
    prof.runcall(main, sys.argv)
    prof.close()

    stats = hotshot.stats.load("speed.prof")
    stats.strip_dirs()
    stats.sort_stats('time', 'calls')

    ipdb.set_trace()
    else:
    main(sys.argv)