Created
October 13, 2018 16:10
-
-
Save aaronkub/e0fd5583da22816cd040d8050b5b8b82 to your computer and use it in GitHub Desktop.
Revisions
-
aaronkub created this gist
Oct 13, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,33 @@ from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split tfidf_vectorizer = TfidfVectorizer() tfidf_vectorizer.fit(reviews_train_clean) X = tfidf_vectorizer.transform(reviews_train_clean) X_test = tfidf_vectorizer.transform(reviews_test_clean) X_train, X_val, y_train, y_val = train_test_split( X, target, train_size = 0.75 ) for c in [0.01, 0.05, 0.25, 0.5, 1]: lr = LogisticRegression(C=c) lr.fit(X_train, y_train) print ("Accuracy for C=%s: %s" % (c, accuracy_score(y_val, lr.predict(X_val)))) # Accuracy for C=0.01: 0.79632 # Accuracy for C=0.05: 0.83168 # Accuracy for C=0.25: 0.86768 # Accuracy for C=0.5: 0.8736 # Accuracy for C=1: 0.88432 final_tfidf = LogisticRegression(C=1) final_tfidf.fit(X, target) print ("Final Accuracy: %s" % accuracy_score(target, final_tfidf.predict(X_test))) # Final Accuracy: 0.882