import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer def get_tf_idf(vectorizer): feature_names = vectorizer.get_feature_names() dense_vec = vectors.todense() dense_list = dense_vec.tolist() tfidf_data = pd.DataFrame(dense_list, columns=feature_names) return tfidf_data vectorizer = TfidfVectorizer() doc_1 = "TF-IDF uses statistics to measure how important a word is to " \ "a particular document" doc_2 = "The TF-IDF is perfectly balanced, considering both local and global " \ "levels of statistics for the target word." doc_3 = "Words that occur more frequently in a document are weighted higher, " \ "but only if they're more rare within the whole document." documents_list = [doc_1, doc_2, doc_3] vectors = vectorizer.fit_transform(documents_list) tfidf_data = get_tf_idf(vectorizer) print(tfidf_data) # Prints the TF-IDF data for all words across all documents