obar1 · July 30, 2025 06:34
diff --git a/py_fetch_skillboost.py b/py_fetch_skillboost.py
 import subprocess
 import sys
 import requests
 from pathlib import Path

 REPO = "paths_"
 WKHTMLTOPDF_PATH = r"C:\git\py_fetch_skillboost\wkhtmltox\bin\wkhtmltopdf.exe"

 def fetch_and_save_html(path_id, input_path_html):
    url = f"https://partner.cloudskillsboost.google/paths/{path_id}"
    try:
        response = requests.get(url)
        response.raise_for_status()

        html_content = f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <title>Saved Page {path_id}</title>
 </head>
 <body>
    <p>Original page: <a href="{url}" target="_blank">{url}</a></p>
    <hr>
    {response.text}
 </body>
 </html>"""

        if "This site is protected by reCAPTCHA and the Google" in html_content:
            raise Exception("Warning: Page may be protected by reCAPTCHA. PDF conversion might not work properly.")

        Path(input_path_html).write_text(html_content, encoding='utf-8')
        print(f"Page saved successfully as '{input_path_html}'")

    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err} - Status code: {response.status_code}")
    except requests.exceptions.RequestException as err:
        print(f"Error fetching the page: {err}")


 def generate_pdf(input_path_html):
    try:
        if not Path(WKHTMLTOPDF_PATH).exists():
            print(f"wkhtmltopdf not found at: {WKHTMLTOPDF_PATH}")
            return

        output_pdf_path = input_path_html + '.pdf'
        subprocess.run([WKHTMLTOPDF_PATH, input_path_html, output_pdf_path], check=True)
        print(f"PDF saved to: {output_pdf_path}")
    except subprocess.CalledProcessError as e:
        print(f"PDF generation failed: {e}")
    except Exception as e:
        print(f"Unexpected error: {e}")

 def main():
    if len(sys.argv) != 2:
        print("Pass an id please")
        sys.exit(1)

    try:
        path_id = int(sys.argv[1])
        input_path_html = f"{REPO}{path_id}.html"
        fetch_and_save_html(path_id, input_path_html)
        generate_pdf(input_path_html)
    except Exception as e:
        print(e)

 if __name__ == "__main__":
    main()
	import subprocess
	import sys
	import requests
	from pathlib import Path

	REPO = "paths_"
	WKHTMLTOPDF_PATH = r"C:\git\py_fetch_skillboost\wkhtmltox\bin\wkhtmltopdf.exe"

	def fetch_and_save_html(path_id, input_path_html):
	url = f"https://partner.cloudskillsboost.google/paths/{path_id}"
	try:
	response = requests.get(url)
	response.raise_for_status()

	html_content = f"""<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<title>Saved Page {path_id}</title>
	</head>
	<body>
	<p>Original page: <a href="{url}" target="_blank">{url}</a></p>
	<hr>
	{response.text}
	</body>
	</html>"""

	if "This site is protected by reCAPTCHA and the Google" in html_content:
	raise Exception("Warning: Page may be protected by reCAPTCHA. PDF conversion might not work properly.")

	Path(input_path_html).write_text(html_content, encoding='utf-8')
	print(f"Page saved successfully as '{input_path_html}'")

	except requests.exceptions.HTTPError as http_err:
	print(f"HTTP error occurred: {http_err} - Status code: {response.status_code}")
	except requests.exceptions.RequestException as err:
	print(f"Error fetching the page: {err}")


	def generate_pdf(input_path_html):
	try:
	if not Path(WKHTMLTOPDF_PATH).exists():
	print(f"wkhtmltopdf not found at: {WKHTMLTOPDF_PATH}")
	return

	output_pdf_path = input_path_html + '.pdf'
	subprocess.run([WKHTMLTOPDF_PATH, input_path_html, output_pdf_path], check=True)
	print(f"PDF saved to: {output_pdf_path}")
	except subprocess.CalledProcessError as e:
	print(f"PDF generation failed: {e}")
	except Exception as e:
	print(f"Unexpected error: {e}")

	def main():
	if len(sys.argv) != 2:
	print("Pass an id please")
	sys.exit(1)

	try:
	path_id = int(sys.argv[1])
	input_path_html = f"{REPO}{path_id}.html"
	fetch_and_save_html(path_id, input_path_html)
	generate_pdf(input_path_html)
	except Exception as e:
	print(e)

	if __name__ == "__main__":
	main()
No results found