Created
April 14, 2025 11:56
-
-
Save waveform80/80c48f59bb5dbd0bd9e3ebadcd6233a2 to your computer and use it in GitHub Desktop.
Revisions
-
waveform80 created this gist
Apr 14, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,6 @@ An extremely hacky, quickly thrown together script to extract various revisios of a specific Discourse post on discourse.ubuntu.com. Uses the "markdown diff" to extract the "current" revision and dumps them to individual markdown files Does Discourse actually *have* an API for this? I couldn't find it... This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,50 @@ #!/usr/bin/python3 import sys import json from pathlib import Path from urllib.request import urlopen from html.parser import HTMLParser class TableParser(HTMLParser): def __init__(self, column): super().__init__() self.extract_column = column self.current_col = 0 self.state = 'top' self.content = '' def handle_starttag(self, tag, attrs): if tag == 'table' and self.state == 'top': self.content = '' self.state = 'table' elif tag == 'tr' and self.state == 'table': self.state = 'tr' self.current_col = 0 elif tag == 'td' and self.state == 'tr': self.current_col += 1 def handle_endtag(self, tag): if tag == 'tr' and self.state == 'tr': self.state = 'table' elif tag == 'table' and self.state == 'table': self.state = 'top' def handle_data(self, data): if self.current_col == self.extract_column: self.content += data def main(): for rev in range(96, 107): parser = TableParser(column=2) with urlopen(f'https://discourse.ubuntu.com/posts/120902/revisions/{rev}.json') as fp: data = json.load(fp) changes = data['body_changes']['side_by_side_markdown'] parser.feed(changes) Path(f'revision{rev}.md').write_text(parser.content) if __name__ == '__main__': sys.exit(main())