GeorgeSeif · December 28, 2019 16:48 · Dec 28, 2019 · Dec 28, 2019 · Dec 28, 2019 · Dec 28, 2019
diff --git a/scikit_learn_1.py b/scikit_learn_1.py
@@ -15,7 +15,8 @@ def get_tf_idf(vectorizer):
         "a particular document"
 doc_2 = "The TF-IDF is perfectly balanced, considering both local and global " \
         "levels of statistics for the target word."
-doc_3
+doc_3 = "Words that occur more frequently in a document are weighted higher, " \
+        "but only if they're more rare within the whole document."
 documents_list = [doc_1, doc_2, doc_3]
 
 vectors = vectorizer.fit_transform(documents_list)

diff --git a/scikit_learn_1.py b/scikit_learn_1.py
@@ -13,7 +13,8 @@ def get_tf_idf(vectorizer):
 
 doc_1 = "TF-IDF uses statistics to measure how important a word is to " \
         "a particular document"
-doc_2
+doc_2 = "The TF-IDF is perfectly balanced, considering both local and global " \
+        "levels of statistics for the target word."
 doc_3
 documents_list = [doc_1, doc_2, doc_3]
 

diff --git a/scikit_learn_1.py b/scikit_learn_1.py
@@ -11,7 +11,8 @@ def get_tf_idf(vectorizer):
 
 vectorizer = TfidfVectorizer()
 
-doc_1 = "TF-IDF uses statistics to measure how important a word is to a particular document"
+doc_1 = "TF-IDF uses statistics to measure how important a word is to " \
+        "a particular document"
 doc_2
 doc_3
 documents_list = [doc_1, doc_2, doc_3]

diff --git a/scikit_learn_1.py b/scikit_learn_1.py
@@ -11,7 +11,7 @@ def get_tf_idf(vectorizer):
 
 vectorizer = TfidfVectorizer()
 
-doc_1 = 
+doc_1 = "TF-IDF uses statistics to measure how important a word is to a particular document"
 doc_2
 doc_3
 documents_list = [doc_1, doc_2, doc_3]

diff --git a/scikit_learn_1.py b/scikit_learn_1.py
@@ -11,7 +11,10 @@ def get_tf_idf(vectorizer):
 
 vectorizer = TfidfVectorizer()
 
-documents_list = []
+doc_1 = 
+doc_2
+doc_3
+documents_list = [doc_1, doc_2, doc_3]
 
 vectors = vectorizer.fit_transform(documents_list)
 

diff --git a/scikit_learn_1.py b/scikit_learn_1.py
@@ -0,0 +1,21 @@
+import pandas as pd
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+def get_tf_idf(vectorizer):
+  feature_names = vectorizer.get_feature_names()
+  dense_vec = vectors.todense()
+  dense_list = dense_vec.tolist()
+  tfidf_data = pd.DataFrame(dense_list, columns=feature_names)
+  return tfidf_data
+
+
+vectorizer = TfidfVectorizer()
+
+documents_list = []
+
+vectors = vectorizer.fit_transform(documents_list)
+
+tfidf_data = get_tf_idf(vectorizer)
+
+print(tfidf_data)
+# Prints the TF-IDF data for all words across all documents
No results found