Skip to content

Instantly share code, notes, and snippets.

@eriksachse
Created September 27, 2025 08:29
Show Gist options
  • Save eriksachse/43ea634b02fc1902d1ed1ecf20864aaf to your computer and use it in GitHub Desktop.
Save eriksachse/43ea634b02fc1902d1ed1ecf20864aaf to your computer and use it in GitHub Desktop.
For tiktok tutorial
# me - this DAT.
# webClientDAT - The connected Web Client DAT
# statusCode - The status code of the response, formatted as a dictionary with two key-value pairs: 'code', 'message'.
# headerDict - The header of the response from the server formatted as a dictionary. Only sent once when streaming.
# data - The data of the response
# id - The request's unique identifier
def onConnect(webClientDAT, id):
return
def onDisconnect(webClientDAT, id):
return
from html.parser import HTMLParser
class ImageParser(HTMLParser):
def __init__(self):
super().__init__()
self.image_urls = []
def handle_starttag(self, tag, attrs):
if tag == "img":
attr_dict = dict(attrs)
# 1. Skip .svg icons
src = attr_dict.get('src', '')
if src.endswith('.svg') or 'Searchtool' in src:
return
# 2. Skip small images
width = int(attr_dict.get('width', 0))
height = int(attr_dict.get('height', 0))
if width < 50 or height < 50:
return
# If image
self.image_urls.append(src)
def onResponse(webClientDAT, statusCode, headerDict, data, id):
# get the encoding from the header
encoding = headerDict.get('content-type', '')
encoding = encoding.split('charset=')[-1] if 'charset=' in encoding else 'utf-8'
# data is byte data, so decode it to a string
html = data.decode(encoding)
# parse the html to get the image urls
parser = ImageParser()
parser.feed(html)
# write the image urls to a table
table = op('image_urls')
table.clear()
debug(data)
for url in parser.image_urls:
table.appendRow(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment