from bs4 import BeautifulSoup src = open('bhai.html', 'r') import re, csv, pandas soup = BeautifulSoup(src, 'lxml') data = {} table = soup.find("table", attrs={'class':'stripe'}) table_head = table.thead.find_all("tr") heading = [] for th in table_head[0].find_all("th"): heading.append(th.text.replace('\n', '').strip()) table_data = [] for tr in table.tbody.find_all("tr"): t_row = {} for td, th in zip(tr.find_all("td"), heading): t_row[th] = td.text.replace('\n', '').strip() table_data.append(t_row) # print(table_data[1]) schools = [] for row in table_data: if 'School' in row['Finalist Name(s)']: s = re.sub(' +', ' ', row['Finalist Name(s)']) schools.append(s[s.find("(")+1:s.find(")")][8:]) # print(schools[7]) # print(schools) df = pandas.DataFrame(schools) print(df.head) # with open('mangla_learning_schools.csv', 'w', newline="\n") as src: # writer = csv.writer(src, delimiter=',') # writer.writerow(schools) df.to_csv('mangla_learning.csv')