mapcloud · July 28, 2017 13:30 · May 8, 2014 · May 8, 2014 · May 8, 2014
diff --git a/remove-empty-columns.ipynb b/remove-empty-columns.ipynb
@@ -115,7 +115,7 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-      "Using the **`dropna()`** function with the **columns** axis we can drop any column where **all** the entries are **NaN** (missing values)."
+      "Using the [pandas.DataFrame.dropna()](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.dropna.html) function with the **columns** axis we can drop any column where **all** the entries are **NaN** (missing values)."
      ]
     },
     {

diff --git a/remove-empty-columns.py b/remove-empty-columns.py
@@ -24,7 +24,7 @@
 
 # <markdowncell>
 
-# Using the **`dropna()`** function with the **columns** axis we can drop any column where **all** the entries are **NaN** (missing values).
+# Using the [pandas.DataFrame.dropna()](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.dropna.html) function with the **columns** axis we can drop any column where **all** the entries are **NaN** (missing values).
 
 # <codecell>
 

diff --git a/remove-empty-columns.csv b/remove-empty-columns.csv
@@ -0,0 +1,6 @@
+foo,bar,baz
+a,,1
+b,,2
+c,,
+,,4
+e,,5
diff --git a/remove-empty-columns.ipynb b/remove-empty-columns.ipynb
@@ -0,0 +1,194 @@
+{
+ "metadata": {
+  "name": ""
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "To drop all empty columns (but still keeping the headers) using the Python [Pandas library](http://pandas.pydata.org/) we can use the following 4-line script to read in the csv file, drop the columns where **all** the elements are missing, and save the data to a new csv file."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from pandas.io.parsers import read_csv\n",
+      "data = read_csv('remove-empty-columns.csv')\n",
+      "filtered_data = data.dropna(axis='columns', how='all')\n",
+      "filtered_data.to_csv('empty-columns-removed.csv')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "As shown below, the sample data included in the csv file has 3 columns which contain missing values.\n",
+      "\n",
+      "The second column, labeled **bar**, is completely empty except the header; columns like this should be dropped. The other columns contain data, but should not be dropped even though they contain some missing values."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "data"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>foo</th>\n",
+        "      <th>bar</th>\n",
+        "      <th>baz</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>   a</td>\n",
+        "      <td>NaN</td>\n",
+        "      <td>  1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>   b</td>\n",
+        "      <td>NaN</td>\n",
+        "      <td>  2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>   c</td>\n",
+        "      <td>NaN</td>\n",
+        "      <td>NaN</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> NaN</td>\n",
+        "      <td>NaN</td>\n",
+        "      <td>  4</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td>   e</td>\n",
+        "      <td>NaN</td>\n",
+        "      <td>  5</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>5 rows \u00d7 3 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 2,
+       "text": [
+        "   foo  bar  baz\n",
+        "0    a  NaN    1\n",
+        "1    b  NaN    2\n",
+        "2    c  NaN  NaN\n",
+        "3  NaN  NaN    4\n",
+        "4    e  NaN    5\n",
+        "\n",
+        "[5 rows x 3 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Using the **`dropna()`** function with the **columns** axis we can drop any column where **all** the entries are **NaN** (missing values)."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "filtered_data = data.dropna(axis='columns', how='all')\n",
+      "filtered_data"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>foo</th>\n",
+        "      <th>baz</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>   a</td>\n",
+        "      <td>  1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>   b</td>\n",
+        "      <td>  2</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>   c</td>\n",
+        "      <td>NaN</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> NaN</td>\n",
+        "      <td>  4</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td>   e</td>\n",
+        "      <td>  5</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>5 rows \u00d7 2 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 3,
+       "text": [
+        "   foo  baz\n",
+        "0    a    1\n",
+        "1    b    2\n",
+        "2    c  NaN\n",
+        "3  NaN    4\n",
+        "4    e    5\n",
+        "\n",
+        "[5 rows x 2 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 3
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
diff --git a/remove-empty-columns.py b/remove-empty-columns.py
@@ -1 +1,33 @@
-#
+# -*- coding: utf-8 -*-
+# <nbformat>3.0</nbformat>
+
+# <markdowncell>
+
+# To drop all empty columns (but still keeping the headers) using the Python [Pandas library](http://pandas.pydata.org/) we can use the following 4-line script to read in the csv file, drop the columns where **all** the elements are missing, and save the data to a new csv file.
+
+# <codecell>
+
+from pandas.io.parsers import read_csv
+data = read_csv('remove-empty-columns.csv')
+filtered_data = data.dropna(axis='columns', how='all')
+filtered_data.to_csv('empty-columns-removed.csv')
+
+# <markdowncell>
+
+# As shown below, the sample data included in the csv file has 3 columns which contain missing values.
+# 
+# The second column, labeled **bar**, is completely empty except the header; columns like this should be dropped. The other columns contain data, but should not be dropped even though they contain some missing values.
+
+# <codecell>
+
+data
+
+# <markdowncell>
+
+# Using the **`dropna()`** function with the **columns** axis we can drop any column where **all** the entries are **NaN** (missing values).
+
+# <codecell>
+
+filtered_data = data.dropna(axis='columns', how='all')
+filtered_data
+
diff --git a/remove-empty-columns.py b/remove-empty-columns.py
@@ -0,0 +1 @@
+#
No results found