import sys from pathlib import Path import jinja2 from bs4 import BeautifulSoup here = Path(__file__).parent file = Path(sys.argv[1]) soup = BeautifulSoup(file.read_text(), "lxml") BREAK_AT = 11 # Initial non response entries in the survey. def _get_question(heading_cell): tag = heading_cell.div if not tag: return (heading_cell.text, None) return (tag.attrs["data-content"], tag.attrs["data-original-title"]) heading_cells = soup.find("thead").find_all("th") questions = [_get_question(cell) for cell in heading_cells[BREAK_AT:]] entries = [] for row in soup.find("tbody").find_all("tr"): cells = row.find_all("td") responses = [tag.text for tag in cells[BREAK_AT:]] meta = {} meta["Answers Given"] = str(len(list(filter(None, responses)))) for key_cell, value_cell in zip(heading_cells[:BREAK_AT], cells[:BREAK_AT]): key = _get_question(key_cell)[0] if not key: continue if key == "completed": value = "YES" if "text-success" in str(value_cell) else "NO" else: value = value_cell.text meta[key] = value entries.append({"responses": responses, "meta": meta}) jt = jinja2.Template((here / "template.jinja-html").read_text()) text = jt.render(questions=questions, entries=entries) (here / "out.html").write_text(text)