sandeepkunkunuru · January 9, 2022 05:51 · Jan 9, 2022
diff --git a/match.py b/match.py
@@ -0,0 +1,29 @@
+
+import os
+
+from nltk.corpus import stopwords
+sw = set(stopwords.words('english'))
+
+f1 = "<<>>"
+directory = "<<>>"
+
+with open(f1, 'r') as file1:
+    words1 = file1.read().lower().split()
+
+words1 = [word for word in words1 if word not in sw]
+
+files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
+
+with open('outfile.csv', 'a') as output:
+    output.write('{}|{}|{}|{}.\n'.format("word", "count_file_1", " count_file_2", "file_2"))
+
+    for filename in files:
+        f = os.path.join(directory, filename)
+
+        with open(f, 'r') as file2:
+            words2 = file2.read().lower().split()
+
+        words = set(words1) & set(words2)
+
+        for word in words:
+            output.write('{}|{}|{}|{}.\n'.format(word, words1.count(word), words2.count(word), f))
No results found