Skip to content

Instantly share code, notes, and snippets.

@ajayverghese
Created February 14, 2013 18:50
Show Gist options
  • Save ajayverghese/4955210 to your computer and use it in GitHub Desktop.
Save ajayverghese/4955210 to your computer and use it in GitHub Desktop.

Revisions

  1. ajayverghese created this gist Feb 14, 2013.
    63 changes: 63 additions & 0 deletions graphite-diskusage.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,63 @@
    #!/usr/bin/python

    '''
    Calculate disk space required for given graphite retention policy and number of
    metrics
    '''

    #######################################

    # retention policy
    retentions = '10s:6h,1m:7d,10m:2y'

    # number of metrics to be stored
    num_metrics = 3000

    #######################################

    # time translations
    second = 1
    minute = 60 * second
    hour = 60 * minute
    day = 24 * hour
    week = 7 * day
    month = 30 * day
    year = 52 * week
    time_map = {
    's': second,
    'h': hour,
    'd': day,
    'w': week,
    'm': minute,
    'y': year
    }

    datapoint_size = 12 # 12 bytes per datapoint

    def get_num_datapoints(retention):
    '''
    calculate number of datapoints based on the given retention policy string
    '''
    total_datapoints = 0
    for policy in retentions.split(','):
    resolution, period = policy.split(':')
    resolution_num, resolution_type = resolution[:-1], resolution[-1]
    period_num, period_type = period[:-1], period[-1]
    resolution_points = int(resolution_num) * time_map[resolution_type]
    period_points = int(period_num) * time_map[period_type]
    points = period_points / resolution_points
    print '%s for %s => %s data points' % (resolution, period, points)
    total_datapoints += points
    return total_datapoints

    datapoints = get_num_datapoints(retentions)
    disk_required_per_metric = (datapoints * datapoint_size * 1.0) / (1024 * 1024)
    total_disk_required = disk_required_per_metric * num_metrics

    print '=' * 30
    print 'Number of metrics: %s' % num_metrics
    print 'Retention policy: %s' % retentions
    print 'Num datapoints per metric: %s' % datapoints
    print 'Disk space per metric: %s MB' % disk_required_per_metric
    print 'Total disk space for %s metrics: %s MB' % (num_metrics, total_disk_required)
    print '=' * 30