mdvsh · October 16, 2020 16:43 · Oct 16, 2020
diff --git a/mangla_learning.py b/mangla_learning.py
@@ -0,0 +1,33 @@
+from bs4 import BeautifulSoup
+src = open('bhai.html', 'r')
+import re, csv, pandas
+soup = BeautifulSoup(src, 'lxml')
+data = {}
+table = soup.find("table", attrs={'class':'stripe'})
+table_head = table.thead.find_all("tr")
+heading = []
+for th in table_head[0].find_all("th"):
+    heading.append(th.text.replace('\n', '').strip())
+table_data = []
+for tr in table.tbody.find_all("tr"):
+    t_row = {}
+    for td, th in zip(tr.find_all("td"), heading):
+        t_row[th] = td.text.replace('\n', '').strip()
+    table_data.append(t_row)
+# print(table_data[1])
+schools = []
+for row in table_data:
+    if 'School' in row['Finalist Name(s)']:
+        s = re.sub(' +', ' ', row['Finalist Name(s)'])
+        schools.append(s[s.find("(")+1:s.find(")")][8:])
+# print(schools[7])
+# print(schools)
+
+df = pandas.DataFrame(schools)
+print(df.head)
+
+# with open('mangla_learning_schools.csv', 'w', newline="\n") as src:
+#     writer = csv.writer(src, delimiter=',')
+#     writer.writerow(schools)
+
+df.to_csv('mangla_learning.csv')