import subprocess
import sys
import requests
from pathlib import Path
REPO = "paths_"
WKHTMLTOPDF_PATH = r"C:\git\py_fetch_skillboost\wkhtmltox\bin\wkhtmltopdf.exe"
def fetch_and_save_html(path_id, input_path_html):
url = f"https://partner.cloudskillsboost.google/paths/{path_id}"
try:
response = requests.get(url)
response.raise_for_status()
html_content = f"""
Saved Page {path_id}
Original page: {url}
{response.text}
"""
if "This site is protected by reCAPTCHA and the Google" in html_content:
raise Exception("Warning: Page may be protected by reCAPTCHA. PDF conversion might not work properly.")
Path(input_path_html).write_text(html_content, encoding='utf-8')
print(f"Page saved successfully as '{input_path_html}'")
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred: {http_err} - Status code: {response.status_code}")
except requests.exceptions.RequestException as err:
print(f"Error fetching the page: {err}")
def generate_pdf(input_path_html):
try:
if not Path(WKHTMLTOPDF_PATH).exists():
print(f"wkhtmltopdf not found at: {WKHTMLTOPDF_PATH}")
return
output_pdf_path = input_path_html + '.pdf'
subprocess.run([WKHTMLTOPDF_PATH, input_path_html, output_pdf_path], check=True)
print(f"PDF saved to: {output_pdf_path}")
except subprocess.CalledProcessError as e:
print(f"PDF generation failed: {e}")
except Exception as e:
print(f"Unexpected error: {e}")
def main():
if len(sys.argv) != 2:
print("Pass an id please")
sys.exit(1)
try:
path_id = int(sys.argv[1])
input_path_html = f"{REPO}{path_id}.html"
fetch_and_save_html(path_id, input_path_html)
generate_pdf(input_path_html)
except Exception as e:
print(e)
if __name__ == "__main__":
main()