Skip to content

Instantly share code, notes, and snippets.

@thinkler
Created June 11, 2017 11:06
Show Gist options
  • Save thinkler/f1a2493fdb4f0aded58024e87e2b51a9 to your computer and use it in GitHub Desktop.
Save thinkler/f1a2493fdb4f0aded58024e87e2b51a9 to your computer and use it in GitHub Desktop.

Revisions

  1. thinkler created this gist Jun 11, 2017.
    100 changes: 100 additions & 0 deletions PCA Diploma
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,100 @@
    print(__doc__)

    import matplotlib.pyplot as plt

    import csv
    from sklearn.decomposition import PCA
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

    def pca_ol(filename):

    X = []
    y = [0, 1, 2]

    with open(filename) as csvfile:
    readCSV = csv.reader(csvfile, delimiter=',')
    for row in readCSV:
    row = [float(i.replace(",",".")) for i in row]
    X.append(row)

    target_names = ["G1", "G2", "G3"]

    pca = PCA(n_components=2)
    X_r = pca.fit(X).transform(X)

    # Percentage of variance explained for each components
    print('explained variance ratio (first two components): %s'
    % str(pca.explained_variance_ratio_))

    plt.figure()
    colors = ['navy', 'turquoise', 'darkorange']
    lw = 2

    print(X_r)


    for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_r[i, 0], X_r[i, 1], color=color, alpha=.8, lw=lw,
    label=target_name)

    plt.legend(loc='best', shadow=False, scatterpoints=1)

    plt.show()



    # pca_ol('tdgma1.csv')
    # pca_ol('tdgma2.csv')
    # pca_ol('tdgma3.csv')
    # pca_ol('tdgma4.csv')
    pca_ol('tdgma5.csv')
    print(__doc__)

    import matplotlib.pyplot as plt

    import csv
    from sklearn.decomposition import PCA
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

    def pca_ol(filename):

    X = []
    y = [0, 1, 2]

    with open(filename) as csvfile:
    readCSV = csv.reader(csvfile, delimiter=',')
    for row in readCSV:
    row = [float(i.replace(",",".")) for i in row]
    X.append(row)

    target_names = ["G1", "G2", "G3"]

    pca = PCA(n_components=2)
    X_r = pca.fit(X).transform(X)

    # Percentage of variance explained for each components
    print('explained variance ratio (first two components): %s'
    % str(pca.explained_variance_ratio_))

    plt.figure()
    colors = ['navy', 'turquoise', 'darkorange']
    lw = 2

    print(X_r)


    for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_r[i, 0], X_r[i, 1], color=color, alpha=.8, lw=lw,
    label=target_name)

    plt.legend(loc='best', shadow=False, scatterpoints=1)

    plt.show()



    # pca_ol('tdgma1.csv')
    # pca_ol('tdgma2.csv')
    # pca_ol('tdgma3.csv')
    # pca_ol('tdgma4.csv')
    pca_ol('tdgma5.csv')