eriksachse · October 5, 2025 06:57
diff --git a/webclient.py b/webclient.py
 from html.parser import HTMLParser

 class ImageParser(HTMLParser):
    def __init__(self):
        super().__init__()
        self.image_rows = []  # list of [url, caption]
        self.current_img = None
        self.in_caption_div = False
        self.in_caption_p = False
        self.current_caption = ''

    def handle_starttag(self, tag, attrs):
        attr_dict = dict(attrs)
        
        # check if we entered a wp-caption div
        if tag == "div" and 'class' in attr_dict and 'wp-caption' in attr_dict['class']:
            self.in_caption_div = True
            self.current_img = None
            self.current_caption = ''

        # image inside wp-caption
        if self.in_caption_div and tag == "img":
            src = attr_dict.get('src', '')
            if src.endswith('.svg') or 'Searchtool' in src:
                return
            width = int(attr_dict.get('width', 0))
            height = int(attr_dict.get('height', 0))
            if width and height and (width < 50 or height < 50):
                return
            self.current_img = src

        # paragraph for caption text
        if self.in_caption_div and tag == "p" and 'class' in attr_dict and 'wp-caption-text' in attr_dict['class']:
            self.in_caption_p = True
            self.current_caption = ''

    def handle_endtag(self, tag):
        # leave caption paragraph
        if tag == "p" and self.in_caption_p:
            self.in_caption_p = False

        # leave wp-caption div
        if tag == "div" and self.in_caption_div:
            if self.current_img:  # store row [img, caption]
                self.image_rows.append([self.current_img, self.current_caption.strip()])
            self.in_caption_div = False
            self.current_img = None
            self.current_caption = ''

    def handle_data(self, data):
        if self.in_caption_p:
            self.current_caption += data


 def onResponse(webClientDAT, statusCode, headerDict, data, id):
    encoding = headerDict.get('content-type', '')
    encoding = encoding.split('charset=')[-1] if 'charset=' in encoding else 'utf-8'
    html = data.decode(encoding)

    parser = ImageParser()
    parser.feed(html)

    table = op('image_urls')
    table.clear()

    # write URL + caption to table
    for row in parser.image_rows:
        table.appendRow(row)
	from html.parser import HTMLParser

	class ImageParser(HTMLParser):
	def __init__(self):
	super().__init__()
	self.image_rows = [] # list of [url, caption]
	self.current_img = None
	self.in_caption_div = False
	self.in_caption_p = False
	self.current_caption = ''

	def handle_starttag(self, tag, attrs):
	attr_dict = dict(attrs)

	# check if we entered a wp-caption div
	if tag == "div" and 'class' in attr_dict and 'wp-caption' in attr_dict['class']:
	self.in_caption_div = True
	self.current_img = None
	self.current_caption = ''

	# image inside wp-caption
	if self.in_caption_div and tag == "img":
	src = attr_dict.get('src', '')
	if src.endswith('.svg') or 'Searchtool' in src:
	return
	width = int(attr_dict.get('width', 0))
	height = int(attr_dict.get('height', 0))
	if width and height and (width < 50 or height < 50):
	return
	self.current_img = src

	# paragraph for caption text
	if self.in_caption_div and tag == "p" and 'class' in attr_dict and 'wp-caption-text' in attr_dict['class']:
	self.in_caption_p = True
	self.current_caption = ''

	def handle_endtag(self, tag):
	# leave caption paragraph
	if tag == "p" and self.in_caption_p:
	self.in_caption_p = False

	# leave wp-caption div
	if tag == "div" and self.in_caption_div:
	if self.current_img: # store row [img, caption]
	self.image_rows.append([self.current_img, self.current_caption.strip()])
	self.in_caption_div = False
	self.current_img = None
	self.current_caption = ''

	def handle_data(self, data):
	if self.in_caption_p:
	self.current_caption += data


	def onResponse(webClientDAT, statusCode, headerDict, data, id):
	encoding = headerDict.get('content-type', '')
	encoding = encoding.split('charset=')[-1] if 'charset=' in encoding else 'utf-8'
	html = data.decode(encoding)

	parser = ImageParser()
	parser.feed(html)

	table = op('image_urls')
	table.clear()

	# write URL + caption to table
	for row in parser.image_rows:
	table.appendRow(row)