{ "cells": [ { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "import pandas as pd, numpy as np\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "%matplotlib inline \n", "from catboost import *\n", "import matplotlib.pyplot as plt\n", "from sklearn import metrics\n", "from sklearn.model_selection import train_test_split\n", "import shap\n", "from time import time" ] }, { "cell_type": "code", "execution_count": 110, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# ! pip install plotly " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Loading Data" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(32561, 15)\n" ] } ], "source": [ "header = [\"age\",\n", "\"workclass\",\n", "\"fnlwgt\",\n", "\"education\",\n", "\"education-num\",\n", "\"marital-status\",\"occupation\",\"relationship\",\"race\",\n", "\"sex\",\"capital-gain\",\"capital-loss\",\"hours-per-week\",\n", "\"native-country\",\n", "\"income\"]\n", "\n", "train = pd.read_csv(\"adult_data\", names = header, index_col=False)\n", "\n", "print(train.shape)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | age | \n", "workclass | \n", "fnlwgt | \n", "education | \n", "education-num | \n", "marital-status | \n", "occupation | \n", "relationship | \n", "race | \n", "sex | \n", "capital-gain | \n", "capital-loss | \n", "hours-per-week | \n", "native-country | \n", "income | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "39 | \n", "State-gov | \n", "77516 | \n", "Bachelors | \n", "13 | \n", "Never-married | \n", "Adm-clerical | \n", "Not-in-family | \n", "White | \n", "Male | \n", "2174 | \n", "0 | \n", "40 | \n", "United-States | \n", "<=50K | \n", "
| 1 | \n", "50 | \n", "Self-emp-not-inc | \n", "83311 | \n", "Bachelors | \n", "13 | \n", "Married-civ-spouse | \n", "Exec-managerial | \n", "Husband | \n", "White | \n", "Male | \n", "0 | \n", "0 | \n", "13 | \n", "United-States | \n", "<=50K | \n", "
| 2 | \n", "38 | \n", "Private | \n", "215646 | \n", "HS-grad | \n", "9 | \n", "Divorced | \n", "Handlers-cleaners | \n", "Not-in-family | \n", "White | \n", "Male | \n", "0 | \n", "0 | \n", "40 | \n", "United-States | \n", "<=50K | \n", "