Skip to content

Instantly share code, notes, and snippets.

@oddskool
Created September 10, 2013 07:00
Show Gist options
  • Select an option

  • Save oddskool/6505894 to your computer and use it in GitHub Desktop.

Select an option

Save oddskool/6505894 to your computer and use it in GitHub Desktop.

Revisions

  1. oddskool created this gist Sep 10, 2013.
    62 changes: 62 additions & 0 deletions parse_aws_s3_billing.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,62 @@
    # -*- coding:utf-8 -*-
    '''
    Simplistic script to parse the detailed AWS billing CSV file.
    Script displays cost of S3 operations broken down per region, bucket and usage
    type (either storage or network). It also sums up the amount of storage used per bucket.
    Output is filtered wrt to costs < 1$.
    See http://docs.aws.amazon.com/awsaccountbilling/latest/about/programaccess.html for
    how to set up programmatic access to your billing.
    Should be simple enough to enhance this script and use it for other AWS resources
    (EC2, EMR, etc)
    @author: @oddskool <https://github.com/oddskool>
    @license: BSD 3 clauses
    '''

    import sys
    import csv
    from collections import defaultdict

    def add_type(d):
    if d['RecordType'] == 'UsageQuantity':
    return None
    for field in ('Cost', 'UsageQuantity'):
    d[field] = float(d[field])
    for field in ('LinkedAccountId', 'InvoiceID', 'RecordType', 'RecordId',
    'PayerAccountId', 'SubscriptionId'):
    del d[field]
    return d

    def parse(stats, d):
    d = add_type(d)
    if not d:
    return
    if d['ProductName'] != 'Amazon Simple Storage Service':
    return
    stats[(d['AvailabilityZone'] or 'N/A')+' * '+d['ResourceId']+' * '+d['UsageType']]['Cost'] += d['Cost']
    stats[(d['AvailabilityZone'] or 'N/A')+' * '+d['ResourceId']+' * '+d['UsageType']]['UsageQuantity'] += d['UsageQuantity']

    if __name__ == '__main__':
    fd = open(sys.argv[1]) if len(sys.argv) > 1 else sys.stdin
    reader = csv.reader(fd, delimiter=',', quotechar='"')
    legend = None
    stats = defaultdict(lambda: defaultdict(int))
    for row in reader:
    if not legend:
    legend = row
    continue
    d = dict(zip(legend, row))
    try:
    parse(stats, d)
    except Exception as e:
    print e
    print row
    print d
    data = [ (resource, cost_usage) for resource, cost_usage in
    stats.iteritems() if cost_usage['Cost'] > 1.0 ]
    data.sort(key=lambda x:x[-1]['Cost'], reverse=True)
    for d in data:
    print "%50s : $%.2f - %.2f GB" % (d[0],d[1]['Cost'],d[1]['UsageQuantity'])