Created
August 20, 2025 17:51
-
-
Save siliconvallaeys/feeb51ecae25684a11d24cad868984bf to your computer and use it in GitHub Desktop.
Revisions
-
siliconvallaeys created this gist
Aug 20, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,38 @@ from bs4 import BeautifulSoup import re from html import unescape html = input_data.get('items', '') or '' html = unescape(html) def normalize_lines(text: str): text = re.sub(r'<\s*br\s*/?\s*>', '\n', text, flags=re.IGNORECASE) text = re.sub(r'<[^>]+>', '', text) text = re.sub(r'\r\n?|\u2028|\u2029', '\n', text) return [ln.strip() for ln in text.split('\n') if ln.strip()] def normalize_cmp(s: str) -> str: return re.sub(r'\W+', '', s or '').lower() items = [] soup = BeautifulSoup(html, "html.parser") for b in soup.find_all('b'): owner = b.get_text(strip=True) owner_norm = normalize_cmp(owner) chunk_parts = [] for sib in b.next_siblings: if getattr(sib, 'name', None) == 'b': break if hasattr(sib, 'get_text'): chunk_parts.append(sib.get_text(separator='\n')) else: chunk_parts.append(str(sib)) chunk = ''.join(chunk_parts) for line in normalize_lines(chunk): if normalize_cmp(line) == owner_norm: continue items.append({"owner": owner, "task": line}) # Zapier will treat this list as multiple line items output = {"items": items}