Skip to content

Instantly share code, notes, and snippets.

@cryptoscopia
Created January 29, 2018 09:31
Show Gist options
  • Save cryptoscopia/bfc424fe73f007d0eb0437a4f50e63c4 to your computer and use it in GitHub Desktop.
Save cryptoscopia/bfc424fe73f007d0eb0437a4f50e63c4 to your computer and use it in GitHub Desktop.

Revisions

  1. cryptoscopia created this gist Jan 29, 2018.
    90 changes: 90 additions & 0 deletions pwoh_graph.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,90 @@
    #!/usr/bin/env python

    from __future__ import print_function
    import argparse
    from collections import OrderedDict
    import csv
    from datetime import datetime, timedelta
    from decimal import Decimal

    parser = argparse.ArgumentParser()
    parser.add_argument('incoming', help='CSV export of transactions')
    parser.add_argument('outgoing', help='CSV export of internal transactions')
    args = parser.parse_args()

    # A place to gather the incoming transaction data
    data_in = OrderedDict()

    with open(args.incoming) as f:
    incoming = csv.reader(f)
    # Skip CSV header row
    next(incoming)
    for _,_,timestamp,_,_,_,_,value_in,value_out,_,_,_,_,_,_ in incoming:
    # Sanity check, just in case
    assert not float(value_out)
    # Round the timestamp down to the last 10-minute mark
    timestamp = datetime.utcfromtimestamp(int(timestamp))
    rounded_minute = timestamp.minute // 10 * 10
    timestamp = timestamp.replace(minute=rounded_minute, second=0, microsecond=0)
    # First parsed row, store initial record
    if not data_in:
    first_timestamp = timestamp
    data_in[timestamp] = Decimal(value_in)
    last_timestamp = timestamp
    continue
    # We've already had a transaction for this 10-minute period, add this one to it
    if timestamp in data_in:
    data_in[timestamp] += Decimal(value_in)
    last_timestamp = timestamp
    continue
    # Fill in 10-minute periods without transactions with the total from
    # the previous period (probably unnecessary, but avoids gaps)
    while last_timestamp + timedelta(minutes=10) < timestamp:
    data_in[last_timestamp + timedelta(minutes=10)] = data_in[last_timestamp]
    last_timestamp += timedelta(minutes=10)
    # And now add the new 10-minute period with the cumulative total
    data_in[timestamp] = data_in[last_timestamp] + Decimal(value_in)
    last_timestamp = timestamp

    # This is where we'll gather the outgoing transaction data
    data_out = OrderedDict()
    # Start it off with 0 at the time of the first incoming transaction
    data_out[first_timestamp] = Decimal('0.0')
    # The reason I keep adding these silly *_timestamp variables is because I can't
    # do data_in.keys()[0] or data_in.keys()[-1], and I don't want to keep casting
    # them into lists.
    last_out_timestamp = first_timestamp

    with open(args.outgoing) as f:
    outgoing = csv.reader(f)
    # Skip CSV header row
    next(outgoing)
    for _,_,timestamp,_,_,_,_,value_in,value_out,_,_,_,_,_ in outgoing:
    # Sanity check, just in case
    assert not float(value_in)
    # Round the timestamp down to the last 10-minute mark
    timestamp = datetime.utcfromtimestamp(int(timestamp))
    rounded_minute = timestamp.minute // 10 * 10
    timestamp = timestamp.replace(minute=rounded_minute, second=0, microsecond=0)
    # We've already had a transaction for this 10-minute period, add this one to it
    if timestamp in data_out:
    data_out[timestamp] += Decimal(value_out)
    last_out_timestamp = timestamp
    continue
    # Fill in 10-minute periods without transactions as we did for incoming
    while last_out_timestamp + timedelta(minutes=10) < timestamp:
    data_out[last_out_timestamp + timedelta(minutes=10)] = data_out[last_out_timestamp]
    last_out_timestamp += timedelta(minutes=10)
    # And now add the new 10-minute period with the cumulative total
    data_out[timestamp] = data_out[last_out_timestamp] + Decimal(value_in)
    last_out_timestamp = timestamp

    # If the outgoing transactions end more than 10 minutes before
    # the incoming ones, fill in the empty periods at the end
    while last_out_timestamp < last_timestamp:
    data_out[last_out_timestamp + timedelta(minutes=10)] = data_out[last_out_timestamp]
    last_out_timestamp += timedelta(minutes=10)

    # Output our data in CSV format
    for k, v in data_in.items():
    print('%s,%s,%s' % (k, v, data_out[k]))