from bs4 import BeautifulSoup
src = open('bhai.html', 'r')
import re, csv, pandas
soup = BeautifulSoup(src, 'lxml')
data = {}
table = soup.find("table", attrs={'class':'stripe'})
table_head = table.thead.find_all("tr")
heading = []
for th in table_head[0].find_all("th"):
    heading.append(th.text.replace('\n', '').strip())
table_data = []
for tr in table.tbody.find_all("tr"):
    t_row = {}
    for td, th in zip(tr.find_all("td"), heading):
        t_row[th] = td.text.replace('\n', '').strip()
    table_data.append(t_row)
# print(table_data[1])
schools = []
for row in table_data:
    if 'School' in row['Finalist Name(s)']:
        s = re.sub(' +', ' ', row['Finalist Name(s)'])
        schools.append(s[s.find("(")+1:s.find(")")][8:])
# print(schools[7])
# print(schools)

df = pandas.DataFrame(schools)
print(df.head)

# with open('mangla_learning_schools.csv', 'w', newline="\n") as src:
#     writer = csv.writer(src, delimiter=',')
#     writer.writerow(schools)

df.to_csv('mangla_learning.csv')