# Github Statistics per Bokeh Release

Visualize some Github statistics as a function of PyPi releases.

## Github Statistics

* [Github Commit Activity for the last 52 weeks](https://developer.github.com/v3/repos/statistics/#commit-activity)

## Bokeh Features

* [Basic Glyphs](http://bokeh.pydata.org/en/latest/docs/user_guide/plotting.html#plotting-with-basic-glyphs)
* [Twin Axes](http://bokeh.pydata.org/en/latest/docs/user_guide/plotting.html#id23)
* [Tab Panes](http://bokeh.pydata.org/en/latest/docs/user_guide/interaction.html#tab-panes)



In [1]:
from bokeh.plotting import figure, show
from bokeh.models import LinearAxis, Range1d
from bokeh.models.widgets import Panel, Tabs
from bokeh.io import output_notebook
from bokeh.resources import CDN
import bokeh.palettes as palettes

output_notebook(resources=CDN)

In [9]:
show(Tabs(tabs=p['panel']))

In [2]:
import requests
import pandas as pd
import numpy as np

In [3]:
# Github API
activityUrl = "https://api.github.com/repos/bokeh/bokeh/stats/commit_activity"
activity = pd.read_json( activityUrl, convert_dates=['week'])
activity['cumsum'] = activity['total'].cumsum()
# PyPi API
downloadUrl = "https://pypi.python.org/pypi/bokeh/json"
response = requests.get( url = downloadUrl).json()

#### Munge PyPi data

Extract the major releases

In [4]:
release = []
for version in response['releases']:
    tmp = 0
    for dltype in response['releases'][version]:
        tmp += dltype['downloads']
    release.append({ 
        'date': dltype['upload_time'],
        'version': version,
        'count': tmp
    })
download = pd.DataFrame(release)
download['date'] = pd.to_datetime(download['date'])
download = download.sort('version')

majorRelease = download[ 
    download['version'].apply( lambda s: len(s.split('.'))==2  or \
                              s.split('.')[2] == '0')
]

#### Create the color palette to associate with each version

In [5]:
numPalette = 0
for index in range( len(majorRelease) ):
    releaseCycle = activity[ 
        activity['week'] >= majorRelease['date'].iloc[index]
    ]
    if index < len(majorRelease)-1:
        releaseCycle = releaseCycle[activity['week'] <= majorRelease['date'].iloc[index+1]]
    
    if len(releaseCycle) > 0:
        numPalette += 1
colors = []
colorShift = 1
for color in reversed(getattr(palettes,'Greens%i'%(numPalette+colorShift))):
    colors.append(color)



#### Initialize the Bokeh Figures

In [6]:
# Initialize the plot object
p={
    'figure':{
        'Commits to Date': figure(x_axis_type = "datetime", y_range=(0,2000)),
        'Commits per Week': figure(x_axis_type = "datetime", y_range=(0,2000))
    },
    'panel': []
}
# Add glyphs to the plot
p['figure']['Commits to Date'].y_range = Range1d( activity['total'].cumsum().min(), activity['total'].cumsum().max())
p['figure']['Commits per Week'].y_range = Range1d(activity['total'].min(),activity['total'].max())

##### Create the segments based on the versions

In [7]:
segmentCount = colorShift
for index in range( len(majorRelease) ):
    version = majorRelease['version'].iloc[index]
    releaseCycle = activity[ 
        activity['week'] >= majorRelease['date'].iloc[index]
    ]
    if index < len(majorRelease)-1:
        releaseCycle = releaseCycle[activity['week'] <= majorRelease['date'].iloc[index+1]]
    
    if len(releaseCycle) > 0:

        # Add the previous entry to make the plot continuous
        
        
        if releaseCycle.index[0] > 0:
            releaseCycle.loc[releaseCycle.index[0]-1] = activity.loc[releaseCycle.index.min()-1]
        releaseCycle = releaseCycle.sort('week')
        
        p['figure']['Commits to Date'].line( 
                x = releaseCycle['week'],
                y= releaseCycle['cumsum'],
                color=colors[segmentCount],
                line_width=10,
                alpha = .9,
                line_cap = 'round',
                legend = version
              )

        p['figure']['Commits per Week'].line( 
                x = releaseCycle['week'],
                y= releaseCycle['total'],
                color=colors[segmentCount],
                line_width=10,
                alpha = .9,
                line_cap = 'round'
              )

        p['figure']['Commits per Week'].patch( 
            x = pd.concat(
                [
                    pd.Series(releaseCycle['week'].iloc[0]), 
                    releaseCycle['week'],
                    pd.Series(releaseCycle['week'].iloc[-1])
                ],
                axis=0
            ),
            y= np.concatenate( ([0],releaseCycle['total'],[0])),
            color=colors[segmentCount],
            alpha = .5,
            legend = version
          )
        p['figure']['Commits to Date'].patch( 
            x = pd.concat(
                [
                    pd.Series(releaseCycle['week'].iloc[0]), 
                    releaseCycle['week'],
                    pd.Series(releaseCycle['week'].iloc[-1])
                ],
                axis=0
            ),
            y= np.concatenate( ([0],releaseCycle['cumsum'],[0])),
            color=colors[segmentCount],
            alpha = .5,
            legend = version         
          )
        segmentCount += 1

A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


#### Create Tabs and Panels

In [8]:
for index,key in enumerate(p['figure']):
    p['figure'][key].xaxis.axis_label = 'Date'
    p['figure'][key].yaxis.axis_label = key
    p['figure'][key].xgrid.grid_line_color = None
    p['figure'][key].extra_y_ranges = {
        'download': Range1d(
            start = download['count'].min(),
            end = download['count'].max()
        )
    }
    p['figure'][key].legend.orientation = "top_left"
    p['panel'].append(Panel(child=p['figure'][key],title=key))
    

##### Plot