thinkler · June 11, 2017 11:06 · Jun 11, 2017
diff --git a/PCA Diploma b/PCA Diploma
@@ -0,0 +1,100 @@
+print(__doc__)
+
+import matplotlib.pyplot as plt
+
+import csv
+from sklearn.decomposition import PCA
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+
+def pca_ol(filename):
+
+  X = []
+  y = [0, 1, 2]
+
+  with open(filename) as csvfile:
+      readCSV = csv.reader(csvfile, delimiter=',')
+      for row in readCSV:
+          row = [float(i.replace(",",".")) for i in row]
+          X.append(row)
+
+  target_names = ["G1", "G2", "G3"]
+
+  pca = PCA(n_components=2)
+  X_r = pca.fit(X).transform(X)
+
+  # Percentage of variance explained for each components
+  print('explained variance ratio (first two components): %s'
+        % str(pca.explained_variance_ratio_))
+
+  plt.figure()
+  colors = ['navy', 'turquoise', 'darkorange']
+  lw = 2
+
+  print(X_r)
+
+
+  for color, i, target_name in zip(colors, [0, 1, 2], target_names):
+      plt.scatter(X_r[i, 0], X_r[i, 1], color=color, alpha=.8, lw=lw,
+                  label=target_name)
+
+  plt.legend(loc='best', shadow=False, scatterpoints=1)
+
+  plt.show()
+
+
+
+# pca_ol('tdgma1.csv')
+# pca_ol('tdgma2.csv')
+# pca_ol('tdgma3.csv')
+# pca_ol('tdgma4.csv')
+pca_ol('tdgma5.csv')
+print(__doc__)
+
+import matplotlib.pyplot as plt
+
+import csv
+from sklearn.decomposition import PCA
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+
+def pca_ol(filename):
+
+  X = []
+  y = [0, 1, 2]
+
+  with open(filename) as csvfile:
+      readCSV = csv.reader(csvfile, delimiter=',')
+      for row in readCSV:
+          row = [float(i.replace(",",".")) for i in row]
+          X.append(row)
+
+  target_names = ["G1", "G2", "G3"]
+
+  pca = PCA(n_components=2)
+  X_r = pca.fit(X).transform(X)
+
+  # Percentage of variance explained for each components
+  print('explained variance ratio (first two components): %s'
+        % str(pca.explained_variance_ratio_))
+
+  plt.figure()
+  colors = ['navy', 'turquoise', 'darkorange']
+  lw = 2
+
+  print(X_r)
+
+
+  for color, i, target_name in zip(colors, [0, 1, 2], target_names):
+      plt.scatter(X_r[i, 0], X_r[i, 1], color=color, alpha=.8, lw=lw,
+                  label=target_name)
+
+  plt.legend(loc='best', shadow=False, scatterpoints=1)
+
+  plt.show()
+
+
+
+# pca_ol('tdgma1.csv')
+# pca_ol('tdgma2.csv')
+# pca_ol('tdgma3.csv')
+# pca_ol('tdgma4.csv')
+pca_ol('tdgma5.csv')