Skip to content

Instantly share code, notes, and snippets.

@dschien
Created July 6, 2015 16:30
Show Gist options
  • Select an option

  • Save dschien/66b8764c4ad1b7ed4f33 to your computer and use it in GitHub Desktop.

Select an option

Save dschien/66b8764c4ad1b7ed4f33 to your computer and use it in GitHub Desktop.

Revisions

  1. dschien created this gist Jul 6, 2015.
    157 changes: 157 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,157 @@
    {
    "cells": [
    {
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy as sp\n",
    "import statsmodels.api as sm\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.linear_model import LinearRegression\n",
    "%matplotlib inline"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 7,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": [
    "# Data from R ISLR package - write.csv(Boston, \"Boston.csv\", col.names = FALSE)\n",
    "boston_df = pd.read_csv(\"../../r/Boston.csv\")"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 12,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "data": {
    "text/plain": [
    "(47.117263854857882,\n",
    " array([ -3.05335819e+09, 3.05335819e+09, 9.31299461e-02,\n",
    " -3.29341722e+00]))"
    ]
    },
    "execution_count": 12,
    "metadata": {},
    "output_type": "execute_result"
    }
    ],
    "source": [
    "# fitting medv ~ lstat + I(lstat^2)\n",
    "boston_df[\"lstat^2\"] = boston_df[\"lstat\"] ** 2\n",
    "# fitting medv ~ poly(lstat,4). We already have lstat^2 and lstat from previous\n",
    "boston_df[\"lstat^4\"] = np.power(boston_df[\"lstat\"], 4)\n",
    "boston_df[\"lstat^3\"] = np.power(boston_df[\"lstat\"], 4)\n",
    "X = boston_df[[\"lstat^4\", \"lstat^3\", \"lstat^2\", \"lstat\"]]\n",
    "y = boston_df[\"medv\"]\n",
    "reg7 = LinearRegression()\n",
    "reg7.fit(X, y)\n",
    "(reg7.intercept_, reg7.coef_)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 14,
    "metadata": {
    "collapsed": false
    },
    "outputs": [],
    "source": [
    "# X = boston_df[[\"lstat^4\", \"lstat^3\", \"lstat^2\", \"lstat\"]]\n",
    "X = sm.add_constant(X)\n",
    "# X = boston_df[[1., \"lstat^4\", \"lstat^3\", \"lstat^2\", \"lstat\"]]\n",
    "ols = sm.OLS(y,X).fit()\n",
    "# ols.summary()"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 11,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "data": {
    "text/plain": [
    "False"
    ]
    },
    "execution_count": 11,
    "metadata": {},
    "output_type": "execute_result"
    }
    ],
    "source": [
    "eps = 0.0000000001\n",
    "np.all(np.abs(ols.params.values[1:] - reg7.coef_) < eps)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 13,
    "metadata": {
    "collapsed": false
    },
    "outputs": [
    {
    "data": {
    "text/plain": [
    "array([ -1.17513710e-05, -1.17509020e-05, 9.23027375e-02,\n",
    " -3.27115207e+00])"
    ]
    },
    "execution_count": 13,
    "metadata": {},
    "output_type": "execute_result"
    }
    ],
    "source": [
    "ols.params.values[1:]"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
    "collapsed": true
    },
    "outputs": [],
    "source": []
    }
    ],
    "metadata": {
    "kernelspec": {
    "display_name": "Python 2",
    "language": "python",
    "name": "python2"
    },
    "language_info": {
    "codemirror_mode": {
    "name": "ipython",
    "version": 2
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
    "version": "2.7.9"
    }
    },
    "nbformat": 4,
    "nbformat_minor": 0
    }