Created
January 29, 2018 09:31
-
-
Save cryptoscopia/bfc424fe73f007d0eb0437a4f50e63c4 to your computer and use it in GitHub Desktop.
Revisions
-
cryptoscopia created this gist
Jan 29, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,90 @@ #!/usr/bin/env python from __future__ import print_function import argparse from collections import OrderedDict import csv from datetime import datetime, timedelta from decimal import Decimal parser = argparse.ArgumentParser() parser.add_argument('incoming', help='CSV export of transactions') parser.add_argument('outgoing', help='CSV export of internal transactions') args = parser.parse_args() # A place to gather the incoming transaction data data_in = OrderedDict() with open(args.incoming) as f: incoming = csv.reader(f) # Skip CSV header row next(incoming) for _,_,timestamp,_,_,_,_,value_in,value_out,_,_,_,_,_,_ in incoming: # Sanity check, just in case assert not float(value_out) # Round the timestamp down to the last 10-minute mark timestamp = datetime.utcfromtimestamp(int(timestamp)) rounded_minute = timestamp.minute // 10 * 10 timestamp = timestamp.replace(minute=rounded_minute, second=0, microsecond=0) # First parsed row, store initial record if not data_in: first_timestamp = timestamp data_in[timestamp] = Decimal(value_in) last_timestamp = timestamp continue # We've already had a transaction for this 10-minute period, add this one to it if timestamp in data_in: data_in[timestamp] += Decimal(value_in) last_timestamp = timestamp continue # Fill in 10-minute periods without transactions with the total from # the previous period (probably unnecessary, but avoids gaps) while last_timestamp + timedelta(minutes=10) < timestamp: data_in[last_timestamp + timedelta(minutes=10)] = data_in[last_timestamp] last_timestamp += timedelta(minutes=10) # And now add the new 10-minute period with the cumulative total data_in[timestamp] = data_in[last_timestamp] + Decimal(value_in) last_timestamp = timestamp # This is where we'll gather the outgoing transaction data data_out = OrderedDict() # Start it off with 0 at the time of the first incoming transaction data_out[first_timestamp] = Decimal('0.0') # The reason I keep adding these silly *_timestamp variables is because I can't # do data_in.keys()[0] or data_in.keys()[-1], and I don't want to keep casting # them into lists. last_out_timestamp = first_timestamp with open(args.outgoing) as f: outgoing = csv.reader(f) # Skip CSV header row next(outgoing) for _,_,timestamp,_,_,_,_,value_in,value_out,_,_,_,_,_ in outgoing: # Sanity check, just in case assert not float(value_in) # Round the timestamp down to the last 10-minute mark timestamp = datetime.utcfromtimestamp(int(timestamp)) rounded_minute = timestamp.minute // 10 * 10 timestamp = timestamp.replace(minute=rounded_minute, second=0, microsecond=0) # We've already had a transaction for this 10-minute period, add this one to it if timestamp in data_out: data_out[timestamp] += Decimal(value_out) last_out_timestamp = timestamp continue # Fill in 10-minute periods without transactions as we did for incoming while last_out_timestamp + timedelta(minutes=10) < timestamp: data_out[last_out_timestamp + timedelta(minutes=10)] = data_out[last_out_timestamp] last_out_timestamp += timedelta(minutes=10) # And now add the new 10-minute period with the cumulative total data_out[timestamp] = data_out[last_out_timestamp] + Decimal(value_in) last_out_timestamp = timestamp # If the outgoing transactions end more than 10 minutes before # the incoming ones, fill in the empty periods at the end while last_out_timestamp < last_timestamp: data_out[last_out_timestamp + timedelta(minutes=10)] = data_out[last_out_timestamp] last_out_timestamp += timedelta(minutes=10) # Output our data in CSV format for k, v in data_in.items(): print('%s,%s,%s' % (k, v, data_out[k]))