Created
September 27, 2025 08:29
-
-
Save eriksachse/43ea634b02fc1902d1ed1ecf20864aaf to your computer and use it in GitHub Desktop.
Revisions
-
eriksachse created this gist
Sep 27, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,58 @@ # me - this DAT. # webClientDAT - The connected Web Client DAT # statusCode - The status code of the response, formatted as a dictionary with two key-value pairs: 'code', 'message'. # headerDict - The header of the response from the server formatted as a dictionary. Only sent once when streaming. # data - The data of the response # id - The request's unique identifier def onConnect(webClientDAT, id): return def onDisconnect(webClientDAT, id): return from html.parser import HTMLParser class ImageParser(HTMLParser): def __init__(self): super().__init__() self.image_urls = [] def handle_starttag(self, tag, attrs): if tag == "img": attr_dict = dict(attrs) # 1. Skip .svg icons src = attr_dict.get('src', '') if src.endswith('.svg') or 'Searchtool' in src: return # 2. Skip small images width = int(attr_dict.get('width', 0)) height = int(attr_dict.get('height', 0)) if width < 50 or height < 50: return # If image self.image_urls.append(src) def onResponse(webClientDAT, statusCode, headerDict, data, id): # get the encoding from the header encoding = headerDict.get('content-type', '') encoding = encoding.split('charset=')[-1] if 'charset=' in encoding else 'utf-8' # data is byte data, so decode it to a string html = data.decode(encoding) # parse the html to get the image urls parser = ImageParser() parser.feed(html) # write the image urls to a table table = op('image_urls') table.clear() debug(data) for url in parser.image_urls: table.appendRow(url)