Skip to content

Instantly share code, notes, and snippets.

@waveform80
Created April 14, 2025 11:56
Show Gist options
  • Select an option

  • Save waveform80/80c48f59bb5dbd0bd9e3ebadcd6233a2 to your computer and use it in GitHub Desktop.

Select an option

Save waveform80/80c48f59bb5dbd0bd9e3ebadcd6233a2 to your computer and use it in GitHub Desktop.

Revisions

  1. waveform80 created this gist Apr 14, 2025.
    6 changes: 6 additions & 0 deletions README.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,6 @@
    An extremely hacky, quickly thrown together script to extract various
    revisios of a specific Discourse post on discourse.ubuntu.com. Uses the
    "markdown diff" to extract the "current" revision and dumps them to
    individual markdown files

    Does Discourse actually *have* an API for this? I couldn't find it...
    50 changes: 50 additions & 0 deletions discourse_revs.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,50 @@
    #!/usr/bin/python3

    import sys
    import json
    from pathlib import Path
    from urllib.request import urlopen
    from html.parser import HTMLParser


    class TableParser(HTMLParser):
    def __init__(self, column):
    super().__init__()
    self.extract_column = column
    self.current_col = 0
    self.state = 'top'
    self.content = ''

    def handle_starttag(self, tag, attrs):
    if tag == 'table' and self.state == 'top':
    self.content = ''
    self.state = 'table'
    elif tag == 'tr' and self.state == 'table':
    self.state = 'tr'
    self.current_col = 0
    elif tag == 'td' and self.state == 'tr':
    self.current_col += 1

    def handle_endtag(self, tag):
    if tag == 'tr' and self.state == 'tr':
    self.state = 'table'
    elif tag == 'table' and self.state == 'table':
    self.state = 'top'

    def handle_data(self, data):
    if self.current_col == self.extract_column:
    self.content += data


    def main():
    for rev in range(96, 107):
    parser = TableParser(column=2)
    with urlopen(f'https://discourse.ubuntu.com/posts/120902/revisions/{rev}.json') as fp:
    data = json.load(fp)
    changes = data['body_changes']['side_by_side_markdown']
    parser.feed(changes)
    Path(f'revision{rev}.md').write_text(parser.content)


    if __name__ == '__main__':
    sys.exit(main())