import numpy as np import pandas as pd from sklearn.decomposition import PCA from sklearn.cluster import KMeans import matplotlib.pyplot as plt import seaborn as sb np.seterr(divide='ignore', invalid='ignore') # Quick way to test just a few column features # stocks = pd.read_csv('supercolumns-elements-nasdaq-nyse-otcbb-general-UPDATE-2017-03-01.csv', usecols=range(1,16)) stocks = pd.read_csv('supercolumns-elements-nasdaq-nyse-otcbb-general-UPDATE-2017-03-01.csv') print(stocks.head()) str_list = [] for colname, colvalue in stocks.iteritems(): if type(colvalue[1]) == str: str_list.append(colname) # Get to the numeric columns by inversion num_list = stocks.columns.difference(str_list) stocks_num = stocks[num_list] print(stocks_num.head())