Created
October 7, 2025 14:31
-
-
Save gubatron/fc48992eec564c0a30cbab896ec678f3 to your computer and use it in GitHub Desktop.
Revisions
-
gubatron created this gist
Oct 7, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,79 @@ #!/usr/bin/env bash set -euo pipefail if [ "$#" -ne 1 ]; then echo "Usage: $(basename "$0") <newsletter_url>" >&2 exit 1 fi NEWSLETTER_URL="$1" if ! command -v curl >/dev/null 2>&1; then echo "Error: curl is required but not installed." >&2 exit 1 fi if command -v python3 >/dev/null 2>&1; then PYTHON="python3" elif command -v python >/dev/null 2>&1; then PYTHON="python" else echo "Error: python3 or python is required but not installed." >&2 exit 1 fi if ! command -v uninews >/dev/null 2>&1; then echo "Error: uninews command is required but not available in PATH." >&2 exit 1 fi tmpfile=$(mktemp) trap 'rm -f "$tmpfile"' EXIT curl -fsSL "$NEWSLETTER_URL" -o "$tmpfile" story_urls=() while IFS= read -r line; do story_urls+=("$line") done < <("$PYTHON" - "$tmpfile" <<'PY' import sys import re from pathlib import Path html = Path(sys.argv[1]).read_text() pattern = re.compile(r'<h2[^>]*>\s*<a[^>]+href="([^"]+)"', re.IGNORECASE) seen = set() urls = [] for match in pattern.finditer(html): url = match.group(1) if url in seen: continue seen.add(url) urls.append(url) if len(urls) == 5: break for url in urls: print(url) PY ) if [ "${#story_urls[@]}" -eq 0 ]; then echo "Error: Unable to find any story URLs in the provided newsletter." >&2 exit 1 fi if [ "${#story_urls[@]}" -lt 5 ]; then printf 'Warning: only found %d story URLs in the newsletter.\n' "${#story_urls[@]}" >&2 fi separator=$'\n\n=============\n\n' for idx in "${!story_urls[@]}"; do url="${story_urls[$idx]}" if [ "$idx" -ne 0 ]; then printf '%s' "$separator" fi uninews "$url" done