DrOctogon · June 27, 2014 17:30 · Jul 5, 2013 · Jul 5, 2013 · Jul 5, 2013
diff --git a/prep-contacts-for-ponymailer b/prep-contacts-for-ponymailer
@@ -1,46 +1,66 @@
-# This represents a major refinement of the scripts to extract names and emails 
-# and as soon as I'm sure it is complete, then I will delete the corresponding
-# numbers from my GistBox - 07/05/2013
+# A simple python script to extract names, and emails from
+# a certain online directory
 
-import os
-import json
+import os, json
 from bs4 import BeautifulSoup
 
-# choose file and assign to infile variable
-infile = # path/to/infile
-outfile =# path/to/outfile
-
-file = open(infile, 'r')
-soup = BeautifulSoup(file)
-strongs = soup.select('strong')
-mailtos = soup.select('a[href^=mailto]')
-prenames = []
-names = []
-emails = []
-jsondump = []
-
-# Extract names
-for i in strongs:
-  for j in i:
-    prenames.append(j.string)
+#get a list of the files in the current directory
+inputfiles = os.listdir(os.getcwd())
+
+def postproc(inputfiles):
+
+#for every file in the directory
+  for i in inputfiles:
+
+#call the preproc function on said file and generate the appropriate outfile
+	preproc(i, "out"+str(inputfiles.index(i))+".txt")
+
+def preproc(infile, outfile):
+
+	# open the infile for reading
+    file = open(infile, 'r')
+
+    # convert the infile to soup object
+	soup = BeautifulSoup(file)
+
+	# find all <strong></strong> elements
+	strongs = soup.select('strong')
+
+	# find all mailto (email) elements
+	mailtos = soup.select('a[href^=mailto]')
+
+	# prep variables for subsequent stages i process
+	prenames = []
+	names = []
+	emails = []
+	contactzip = []
+	jsondump = []
+
+	# Extract names
+	for i in strongs:
+  		for j in i:
+    		prenames.append(j.string)
 
-for i in prenames:
-  if prenames.index(i)%2 != 0:
-    if i.string != None:
-      if i != '\n':
-        names.append(i.string.encode('utf-8').strip())
+	for i in prenames:
+  		if prenames.index(i)%2 != 0:
+    		if i.string != None:
+      			if i != '\n':
+        			names.append(i.string.encode('utf-8').strip())
 
-# Extract emails        
-for i in mailtos:
-  if i.string != None:
-    emails.append(i.string.encode('utf-8').strip())
+	# Extract emails        
+	for i in mailtos:
+  		if i.string != None:
+    		emails.append(i.string.encode('utf-8').strip())
 
-# zip together names,emails into a list of lists
-jsondump = zip(names,emails)
+	# zip together names,emails into a list of lists
+	contactzip = zip(emails, names)
 
-# convert list of lists to json for processing by ponymailer
-jsondump = json.dumps(jsondump)
+	# convert list of lists to json for processing by ponymailer
+	jsondump = json.dumps(contactzip)
+
+	# write to file
+	with open(outfile, 'w') as file:
+  		file.write(jsondump)
 
-# write to file
-with open(outfile, 'w') as file:
-  file.write(jsondump)
+# run the script
+postproc(inputfiles)
diff --git a/prep-contacts-for-ponymailer b/prep-contacts-for-ponymailer
@@ -0,0 +1,46 @@
+# This represents a major refinement of the scripts to extract names and emails 
+# and as soon as I'm sure it is complete, then I will delete the corresponding
+# numbers from my GistBox - 07/05/2013
+
+import os
+import json
+from bs4 import BeautifulSoup
+
+# choose file and assign to infile variable
+infile = # path/to/infile
+outfile =# path/to/outfile
+
+file = open(infile, 'r')
+soup = BeautifulSoup(file)
+strongs = soup.select('strong')
+mailtos = soup.select('a[href^=mailto]')
+prenames = []
+names = []
+emails = []
+jsondump = []
+
+# Extract names
+for i in strongs:
+  for j in i:
+    prenames.append(j.string)
+
+for i in prenames:
+  if prenames.index(i)%2 != 0:
+    if i.string != None:
+      if i != '\n':
+        names.append(i.string.encode('utf-8').strip())
+
+# Extract emails        
+for i in mailtos:
+  if i.string != None:
+    emails.append(i.string.encode('utf-8').strip())
+
+# zip together names,emails into a list of lists
+jsondump = zip(names,emails)
+
+# convert list of lists to json for processing by ponymailer
+jsondump = json.dumps(jsondump)
+
+# write to file
+with open(outfile, 'w') as file:
+  file.write(jsondump)