eriksachse · September 27, 2025 08:29 · Sep 27, 2025
diff --git a/webscrape.py b/webscrape.py
@@ -0,0 +1,58 @@
+# me - this DAT.
+# webClientDAT - The connected Web Client DAT
+# statusCode - The status code of the response, formatted as a dictionary with two key-value pairs: 'code', 'message'.
+# headerDict - The header of the response from the server formatted as a dictionary. Only sent once when streaming.
+# data - The data of the response
+# id - The request's unique identifier
+
+def onConnect(webClientDAT, id):
+	return
+
+def onDisconnect(webClientDAT, id):
+	return
+
+
+from html.parser import HTMLParser
+
+
+class ImageParser(HTMLParser):
+    def __init__(self):
+        super().__init__()
+        self.image_urls = []
+    def handle_starttag(self, tag, attrs):
+        if tag == "img":
+	        attr_dict = dict(attrs)
+
+	        # 1. Skip .svg icons
+	        src = attr_dict.get('src', '')
+	        if src.endswith('.svg') or 'Searchtool' in src:
+	            return
+
+	        # 2. Skip small images
+	        width = int(attr_dict.get('width', 0))
+	        height = int(attr_dict.get('height', 0))
+	        if width < 50 or height < 50:
+	        	return
+
+	        # If image
+	        self.image_urls.append(src)
+
+
+
+def onResponse(webClientDAT, statusCode, headerDict, data, id):
+    # get the encoding from the header
+    encoding = headerDict.get('content-type', '')
+    encoding = encoding.split('charset=')[-1] if 'charset=' in encoding else 'utf-8'
+    # data is byte data, so decode it to a string
+    html = data.decode(encoding)
+
+    # parse the html to get the image urls
+    parser = ImageParser()
+    parser.feed(html)
+
+    # write the image urls to a table
+    table = op('image_urls')
+    table.clear()
+    debug(data)
+    for url in parser.image_urls:
+        table.appendRow(url)