Last active
July 30, 2025 06:34
-
-
Save obar1/a6a7c656def69cf1179807f4303b345b to your computer and use it in GitHub Desktop.
simple skillboost html page downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import subprocess | |
| import sys | |
| import requests | |
| from pathlib import Path | |
| REPO = "paths_" | |
| WKHTMLTOPDF_PATH = r"C:\git\py_fetch_skillboost\wkhtmltox\bin\wkhtmltopdf.exe" | |
| def fetch_and_save_html(path_id, input_path_html): | |
| url = f"https://partner.cloudskillsboost.google/paths/{path_id}" | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| html_content = f"""<!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <title>Saved Page {path_id}</title> | |
| </head> | |
| <body> | |
| <p>Original page: <a href="{url}" target="_blank">{url}</a></p> | |
| <hr> | |
| {response.text} | |
| </body> | |
| </html>""" | |
| if "This site is protected by reCAPTCHA and the Google" in html_content: | |
| raise Exception("Warning: Page may be protected by reCAPTCHA. PDF conversion might not work properly.") | |
| Path(input_path_html).write_text(html_content, encoding='utf-8') | |
| print(f"Page saved successfully as '{input_path_html}'") | |
| except requests.exceptions.HTTPError as http_err: | |
| print(f"HTTP error occurred: {http_err} - Status code: {response.status_code}") | |
| except requests.exceptions.RequestException as err: | |
| print(f"Error fetching the page: {err}") | |
| def generate_pdf(input_path_html): | |
| try: | |
| if not Path(WKHTMLTOPDF_PATH).exists(): | |
| print(f"wkhtmltopdf not found at: {WKHTMLTOPDF_PATH}") | |
| return | |
| output_pdf_path = input_path_html + '.pdf' | |
| subprocess.run([WKHTMLTOPDF_PATH, input_path_html, output_pdf_path], check=True) | |
| print(f"PDF saved to: {output_pdf_path}") | |
| except subprocess.CalledProcessError as e: | |
| print(f"PDF generation failed: {e}") | |
| except Exception as e: | |
| print(f"Unexpected error: {e}") | |
| def main(): | |
| if len(sys.argv) != 2: | |
| print("Pass an id please") | |
| sys.exit(1) | |
| try: | |
| path_id = int(sys.argv[1]) | |
| input_path_html = f"{REPO}{path_id}.html" | |
| fetch_and_save_html(path_id, input_path_html) | |
| generate_pdf(input_path_html) | |
| except Exception as e: | |
| print(e) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment