waveform80 · April 14, 2025 11:56 · Apr 14, 2025
diff --git a/README.md b/README.md
@@ -0,0 +1,6 @@
+An extremely hacky, quickly thrown together script to extract various
+revisios of a specific Discourse post on discourse.ubuntu.com. Uses the
+"markdown diff" to extract the "current" revision and dumps them to
+individual markdown files
+
+Does Discourse actually *have* an API for this? I couldn't find it...
diff --git a/discourse_revs.py b/discourse_revs.py
@@ -0,0 +1,50 @@
+#!/usr/bin/python3
+
+import sys
+import json
+from pathlib import Path
+from urllib.request import urlopen
+from html.parser import HTMLParser
+
+
+class TableParser(HTMLParser):
+    def __init__(self, column):
+        super().__init__()
+        self.extract_column = column
+        self.current_col = 0
+        self.state = 'top'
+        self.content = ''
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'table' and self.state == 'top':
+            self.content = ''
+            self.state = 'table'
+        elif tag == 'tr' and self.state == 'table':
+            self.state = 'tr'
+            self.current_col = 0
+        elif tag == 'td' and self.state == 'tr':
+            self.current_col += 1
+
+    def handle_endtag(self, tag):
+        if tag == 'tr' and self.state == 'tr':
+            self.state = 'table'
+        elif tag == 'table' and self.state == 'table':
+            self.state = 'top'
+
+    def handle_data(self, data):
+        if self.current_col == self.extract_column:
+            self.content += data
+
+
+def main():
+    for rev in range(96, 107):
+        parser = TableParser(column=2)
+        with urlopen(f'https://discourse.ubuntu.com/posts/120902/revisions/{rev}.json') as fp:
+            data = json.load(fp)
+            changes = data['body_changes']['side_by_side_markdown']
+            parser.feed(changes)
+            Path(f'revision{rev}.md').write_text(parser.content)
+
+
+if __name__ == '__main__':
+    sys.exit(main())
No results found