import numpy as np from sklearn.decomposition import PCA import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import scale %matplotlib inline #Load data set data = pd.read_csv('data.csv') #convert it to numpy arrays X=data.values[:,1:7] print 'Raw data:\n', X, '\n' #Scaling the values X = scale(X) pca = PCA(n_components=2) pca.fit(X) print 'Components:\n', pca.components_, '\n' newCoords = pca.transform(X) print 'Transformed Coordinates:\n', newCoords, '\n' #The amount of variance that each PC explains var = pca.explained_variance_ratio_ #Cumulative Variance explains var1 = np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100) plt.plot(var1)