{ "cells": [ { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from scipy.sparse import coo_matrix\n", "from implicit.als import AlternatingLeastSquares\n", "import requests" ] }, { "cell_type": "code", "execution_count": 118, "metadata": { "collapsed": true }, "outputs": [], "source": [ "project_id = \"CHANGEME\"\n", "github_user = \"CHANGEME\"\n", "github_token = \"CHANGEME\" # from https://github.com/settings/tokens" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [], "source": [ "github_auth = requests.auth.HTTPBasicAuth(github_user, github_token)" ] }, { "cell_type": "code", "execution_count": 135, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requesting query... ok.\n", "Query running...\n", "Query done.\n", "Cache hit.\n", "\n", "Retrieving results...\n", "Got 78238 rows.\n", "\n", "Total time taken 6.51 s.\n", "Finished at 2017-06-24 09:08:00.\n" ] } ], "source": [ "query = \"\"\"\n", "WITH stars AS (\n", " SELECT actor.login AS user, repo.name AS repo, created_at AS timestamp\n", " FROM githubarchive.month.201706\n", " WHERE type=\"WatchEvent\"\n", "),\n", "repositories_stars AS (\n", " SELECT repo, COUNT(*) as c\n", " FROM stars\n", " GROUP BY repo\n", " ORDER BY c DESC\n", " LIMIT 1000\n", "),\n", "users_stars AS (\n", " SELECT user, COUNT(*) as c\n", " FROM stars\n", " WHERE repo IN (SELECT repo FROM repositories_stars)\n", " GROUP BY user\n", " HAVING c > 10 AND C < 100\n", " LIMIT 10000\n", ")\n", "SELECT\n", "user, repo, timestamp\n", "FROM stars\n", "WHERE repo IN (SELECT repo FROM repositories_stars)\n", "AND user IN (SELECT user FROM users_stars)\n", "ORDER BY timestamp DESC\n", "\"\"\"\n", "\n", "data = pd.io.gbq.read_gbq(query, index_col=\"timestamp\", dialect=\"standard\", project_id=project_id)" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
| \n", " | user | \n", "repo | \n", "
|---|---|---|
| timestamp | \n", "\n", " | \n", " |
| 2017-06-23 23:57:04 | \n", "n3tn0de | \n", "webkul/coolhue | \n", "
| 2017-06-23 23:55:08 | \n", "psw0714 | \n", "justjavac/free-programming-books-zh_CN | \n", "
| 2017-06-23 23:54:36 | \n", "psw0714 | \n", "ecomfe/echarts | \n", "
| 2017-06-23 23:54:21 | \n", "psw0714 | \n", "tastejs/todomvc | \n", "
| 2017-06-23 23:54:14 | \n", "psw0714 | \n", "babel/babel | \n", "