{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Github Statistics per Bokeh Release\n", "\n", "Visualize some Github statistics as a function of PyPi releases.\n", "\n", "## Github Statistics\n", "\n", "* [Github Commit Activity for the last 52 weeks](https://developer.github.com/v3/repos/statistics/#commit-activity)\n", "\n", "## Bokeh Features\n", "\n", "* [Basic Glyphs](http://bokeh.pydata.org/en/latest/docs/user_guide/plotting.html#plotting-with-basic-glyphs)\n", "* [Twin Axes](http://bokeh.pydata.org/en/latest/docs/user_guide/plotting.html#id23)\n", "* [Tab Panes](http://bokeh.pydata.org/en/latest/docs/user_guide/interaction.html#tab-panes)\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from bokeh.plotting import figure, show\n", "from bokeh.models import LinearAxis, Range1d\n", "from bokeh.models.widgets import Panel, Tabs\n", "from bokeh.io import output_notebook\n", "from bokeh.resources import CDN\n", "import bokeh.palettes as palettes\n", "\n", "output_notebook(resources=CDN)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show(Tabs(tabs=p['panel']))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import requests\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Github API\n", "activityUrl = \"https://api.github.com/repos/bokeh/bokeh/stats/commit_activity\"\n", "activity = pd.read_json( activityUrl, convert_dates=['week'])\n", "activity['cumsum'] = activity['total'].cumsum()\n", "# PyPi API\n", "downloadUrl = \"https://pypi.python.org/pypi/bokeh/json\"\n", "response = requests.get( url = downloadUrl).json()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Munge PyPi data\n", "\n", "Extract the major releases" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "release = []\n", "for version in response['releases']:\n", " tmp = 0\n", " for dltype in response['releases'][version]:\n", " tmp += dltype['downloads']\n", " release.append({ \n", " 'date': dltype['upload_time'],\n", " 'version': version,\n", " 'count': tmp\n", " })\n", "download = pd.DataFrame(release)\n", "download['date'] = pd.to_datetime(download['date'])\n", "download = download.sort('version')\n", "\n", "majorRelease = download[ \n", " download['version'].apply( lambda s: len(s.split('.'))==2 or \\\n", " s.split('.')[2] == '0')\n", "]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create the color palette to associate with each version" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/meetings/miniconda3/lib/python3.4/site-packages/pandas/core/frame.py:1825: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", " \"DataFrame index.\", UserWarning)\n" ] } ], "source": [ "numPalette = 0\n", "for index in range( len(majorRelease) ):\n", " releaseCycle = activity[ \n", " activity['week'] >= majorRelease['date'].iloc[index]\n", " ]\n", " if index < len(majorRelease)-1:\n", " releaseCycle = releaseCycle[activity['week'] <= majorRelease['date'].iloc[index+1]]\n", " \n", " if len(releaseCycle) > 0:\n", " numPalette += 1\n", "colors = []\n", "colorShift = 1\n", "for color in reversed(getattr(palettes,'Greens%i'%(numPalette+colorShift))):\n", " colors.append(color)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Initialize the Bokeh Figures" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Initialize the plot object\n", "p={\n", " 'figure':{\n", " 'Commits to Date': figure(x_axis_type = \"datetime\", y_range=(0,2000)),\n", " 'Commits per Week': figure(x_axis_type = \"datetime\", y_range=(0,2000))\n", " },\n", " 'panel': []\n", "}\n", "# Add glyphs to the plot\n", "p['figure']['Commits to Date'].y_range = Range1d( activity['total'].cumsum().min(), activity['total'].cumsum().max())\n", "p['figure']['Commits per Week'].y_range = Range1d(activity['total'].min(),activity['total'].max())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Create the segments based on the versions" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/meetings/miniconda3/lib/python3.4/site-packages/pandas/core/frame.py:1825: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", " \"DataFrame index.\", UserWarning)\n", "/Users/meetings/miniconda3/lib/python3.4/site-packages/IPython/kernel/__main__.py:16: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" ] } ], "source": [ "segmentCount = colorShift\n", "for index in range( len(majorRelease) ):\n", " version = majorRelease['version'].iloc[index]\n", " releaseCycle = activity[ \n", " activity['week'] >= majorRelease['date'].iloc[index]\n", " ]\n", " if index < len(majorRelease)-1:\n", " releaseCycle = releaseCycle[activity['week'] <= majorRelease['date'].iloc[index+1]]\n", " \n", " if len(releaseCycle) > 0:\n", "\n", " # Add the previous entry to make the plot continuous\n", " \n", " \n", " if releaseCycle.index[0] > 0:\n", " releaseCycle.loc[releaseCycle.index[0]-1] = activity.loc[releaseCycle.index.min()-1]\n", " releaseCycle = releaseCycle.sort('week')\n", " \n", " p['figure']['Commits to Date'].line( \n", " x = releaseCycle['week'],\n", " y= releaseCycle['cumsum'],\n", " color=colors[segmentCount],\n", " line_width=10,\n", " alpha = .9,\n", " line_cap = 'round',\n", " legend = version\n", " )\n", "\n", " p['figure']['Commits per Week'].line( \n", " x = releaseCycle['week'],\n", " y= releaseCycle['total'],\n", " color=colors[segmentCount],\n", " line_width=10,\n", " alpha = .9,\n", " line_cap = 'round'\n", " )\n", "\n", " p['figure']['Commits per Week'].patch( \n", " x = pd.concat(\n", " [\n", " pd.Series(releaseCycle['week'].iloc[0]), \n", " releaseCycle['week'],\n", " pd.Series(releaseCycle['week'].iloc[-1])\n", " ],\n", " axis=0\n", " ),\n", " y= np.concatenate( ([0],releaseCycle['total'],[0])),\n", " color=colors[segmentCount],\n", " alpha = .5,\n", " legend = version\n", " )\n", " p['figure']['Commits to Date'].patch( \n", " x = pd.concat(\n", " [\n", " pd.Series(releaseCycle['week'].iloc[0]), \n", " releaseCycle['week'],\n", " pd.Series(releaseCycle['week'].iloc[-1])\n", " ],\n", " axis=0\n", " ),\n", " y= np.concatenate( ([0],releaseCycle['cumsum'],[0])),\n", " color=colors[segmentCount],\n", " alpha = .5,\n", " legend = version \n", " )\n", " segmentCount += 1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create Tabs and Panels" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "for index,key in enumerate(p['figure']):\n", " p['figure'][key].xaxis.axis_label = 'Date'\n", " p['figure'][key].yaxis.axis_label = key\n", " p['figure'][key].xgrid.grid_line_color = None\n", " p['figure'][key].extra_y_ranges = {\n", " 'download': Range1d(\n", " start = download['count'].min(),\n", " end = download['count'].max()\n", " )\n", " }\n", " p['figure'][key].legend.orientation = \"top_left\"\n", " p['panel'].append(Panel(child=p['figure'][key],title=key))\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Plot" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }