macloo · February 22, 2024 18:54 · Feb 22, 2024
diff --git a/scraper_boilerplate.py b/scraper_boilerplate.py
@@ -0,0 +1,13 @@
+from bs4 import BeautifulSoup
+import requests
+hdr = {'User-Agent': 'your user-agent info here'}
+# find YOUR user-agent HERE: https://www.whatismybrowser.com/detect/what-is-my-user-agent/ 
+
+url = 'https://www.some_domain.com/some_dir'
+page = requests.get(url, headers=hdr)
+soup = BeautifulSoup(page.text, 'html.parser')
+
+'''
+If you have a list of URLs to scrape, you need to loop over the list, and 
+make page and soup each time the loop runs.
+'''