#!/usr/bin/env python3 # -*- coding: utf-8 -*- from decimal import Decimal from itertools import zip_longest, chain, repeat import httpx import ujson as json from bs4 import BeautifulSoup as bs # Don't you dare judge me. iowa_req = httpx.get('https://results.thecaucuses.org') results = bs(iowa_req.text, 'lxml').find('section', {'class', 'precinct-results'}) candidates = [el.text for el in results.find('ul', {'class': 'thead'}).find_all('li')[2::3]] parts = [el.text for el in results.find('ul', {'class': 'sub-head'}).find_all('li')[2:]] keys = list(zip(chain.from_iterable(repeat(c, 3) for c in candidates), parts)) def unpack(candidates, precinct): dct = {c: {} for c in candidates} for candidate, part, value in [ list(key) + [Decimal(res.text.replace(',', ''))] for key, res in zip(keys, precinct.find_all('li')[1:]) ]: dct[candidate][part] = value return dct counties = { county.find('div', {'class': 'precinct-county'}).text: { precinct.find('li').text: unpack(candidates, precinct) for precinct in county.find('div', {'class': 'precinct-data'}).find_all('ul') } for county in results.find_all('div', {'class': 'precinct-rows'}) } print(json.dumps(counties))