import subprocess import sys import requests from pathlib import Path REPO = "paths_" WKHTMLTOPDF_PATH = r"C:\git\py_fetch_skillboost\wkhtmltox\bin\wkhtmltopdf.exe" def fetch_and_save_html(path_id, input_path_html): url = f"https://partner.cloudskillsboost.google/paths/{path_id}" try: response = requests.get(url) response.raise_for_status() html_content = f""" Saved Page {path_id}

Original page: {url}


{response.text} """ if "This site is protected by reCAPTCHA and the Google" in html_content: raise Exception("Warning: Page may be protected by reCAPTCHA. PDF conversion might not work properly.") Path(input_path_html).write_text(html_content, encoding='utf-8') print(f"Page saved successfully as '{input_path_html}'") except requests.exceptions.HTTPError as http_err: print(f"HTTP error occurred: {http_err} - Status code: {response.status_code}") except requests.exceptions.RequestException as err: print(f"Error fetching the page: {err}") def generate_pdf(input_path_html): try: if not Path(WKHTMLTOPDF_PATH).exists(): print(f"wkhtmltopdf not found at: {WKHTMLTOPDF_PATH}") return output_pdf_path = input_path_html + '.pdf' subprocess.run([WKHTMLTOPDF_PATH, input_path_html, output_pdf_path], check=True) print(f"PDF saved to: {output_pdf_path}") except subprocess.CalledProcessError as e: print(f"PDF generation failed: {e}") except Exception as e: print(f"Unexpected error: {e}") def main(): if len(sys.argv) != 2: print("Pass an id please") sys.exit(1) try: path_id = int(sys.argv[1]) input_path_html = f"{REPO}{path_id}.html" fetch_and_save_html(path_id, input_path_html) generate_pdf(input_path_html) except Exception as e: print(e) if __name__ == "__main__": main()