#!/usr/bin/env python from __future__ import print_function import argparse from collections import OrderedDict import csv from datetime import datetime, timedelta from decimal import Decimal parser = argparse.ArgumentParser() parser.add_argument('incoming', help='CSV export of transactions') parser.add_argument('outgoing', help='CSV export of internal transactions') args = parser.parse_args() # A place to gather the incoming transaction data data_in = OrderedDict() with open(args.incoming) as f: incoming = csv.reader(f) # Skip CSV header row next(incoming) for _,_,timestamp,_,_,_,_,value_in,value_out,_,_,_,_,_,_ in incoming: # Sanity check, just in case assert not float(value_out) # Round the timestamp down to the last 10-minute mark timestamp = datetime.utcfromtimestamp(int(timestamp)) rounded_minute = timestamp.minute // 10 * 10 timestamp = timestamp.replace(minute=rounded_minute, second=0, microsecond=0) # First parsed row, store initial record if not data_in: first_timestamp = timestamp data_in[timestamp] = Decimal(value_in) last_timestamp = timestamp continue # We've already had a transaction for this 10-minute period, add this one to it if timestamp in data_in: data_in[timestamp] += Decimal(value_in) last_timestamp = timestamp continue # Fill in 10-minute periods without transactions with the total from # the previous period (probably unnecessary, but avoids gaps) while last_timestamp + timedelta(minutes=10) < timestamp: data_in[last_timestamp + timedelta(minutes=10)] = data_in[last_timestamp] last_timestamp += timedelta(minutes=10) # And now add the new 10-minute period with the cumulative total data_in[timestamp] = data_in[last_timestamp] + Decimal(value_in) last_timestamp = timestamp # This is where we'll gather the outgoing transaction data data_out = OrderedDict() # Start it off with 0 at the time of the first incoming transaction data_out[first_timestamp] = Decimal('0.0') # The reason I keep adding these silly *_timestamp variables is because I can't # do data_in.keys()[0] or data_in.keys()[-1], and I don't want to keep casting # them into lists. last_out_timestamp = first_timestamp with open(args.outgoing) as f: outgoing = csv.reader(f) # Skip CSV header row next(outgoing) for _,_,timestamp,_,_,_,_,value_in,value_out,_,_,_,_,_ in outgoing: # Sanity check, just in case assert not float(value_in) # Round the timestamp down to the last 10-minute mark timestamp = datetime.utcfromtimestamp(int(timestamp)) rounded_minute = timestamp.minute // 10 * 10 timestamp = timestamp.replace(minute=rounded_minute, second=0, microsecond=0) # We've already had a transaction for this 10-minute period, add this one to it if timestamp in data_out: data_out[timestamp] += Decimal(value_out) last_out_timestamp = timestamp continue # Fill in 10-minute periods without transactions as we did for incoming while last_out_timestamp + timedelta(minutes=10) < timestamp: data_out[last_out_timestamp + timedelta(minutes=10)] = data_out[last_out_timestamp] last_out_timestamp += timedelta(minutes=10) # And now add the new 10-minute period with the cumulative total data_out[timestamp] = data_out[last_out_timestamp] + Decimal(value_in) last_out_timestamp = timestamp # If the outgoing transactions end more than 10 minutes before # the incoming ones, fill in the empty periods at the end while last_out_timestamp < last_timestamp: data_out[last_out_timestamp + timedelta(minutes=10)] = data_out[last_out_timestamp] last_out_timestamp += timedelta(minutes=10) # Output our data in CSV format for k, v in data_in.items(): print('%s,%s,%s' % (k, v, data_out[k]))