-
-
Save tinyapps/df2b6757a142ff93caf9c63d0ef38b11 to your computer and use it in GitHub Desktop.
Revisions
-
tinyapps revised this gist
Jul 14, 2024 . 1 changed file with 9 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -108,6 +108,15 @@ def wrap_html(data, top_level=True): .comment-text { white-space: pre-wrap; /* Preserve whitespace and line breaks */ } @media (prefers-color-scheme: dark) { body { background-color: #121212; color: #e0e0e0; } .comment-box { border-color: #444; } } </style> ''' meta = '<meta charset="UTF-8">' -
tinyapps revised this gist
Jul 14, 2024 . 1 changed file with 75 additions and 55 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,42 +1,35 @@ #!/usr/bin/env python3 """ Forked and modified from pukkandan/ytdlp_nest_comments.py: https://gist.github.com/pukkandan/ee737fec64822f2552caf3ca4cbf5db7 which included this license and copyright information: "SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT Copyright © 2021 [email protected]" Convert YouTube comments from an info.json file (acquired via yt-dlp --write-comments) to HTML. """ import os.path import json import argparse import logging from datetime import datetime import html # Configure logging logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') def get_fields(dct): for name, fn in FIELDS.items(): val = fn(dct, name) if val is not None: yield name, val def filter_func(comments): return [dict(get_fields(c)) for c in comments] FIELDS = { 'text': dict.get, 'author': dict.get, @@ -46,7 +39,6 @@ def filter_func(comments): 'replies': lambda dct, name: filter_func(dct.get(name, [])) or None } parser = argparse.ArgumentParser() parser.add_argument( '--input-file', '-i', @@ -55,52 +47,80 @@ def filter_func(comments): parser.add_argument( '--output-file', '-o', dest='outputfile', metavar='FILE', required=True, help='File to write comments to (html)') args = parser.parse_args() ext = os.path.splitext(args.outputfile)[1][1:] if ext != 'html': raise SystemExit(f'ERROR: Only html format is supported, not {ext}') logging.info('Reading file') try: with open(args.inputfile, encoding='utf-8') as f: info_dict = json.load(f) except FileNotFoundError: logging.error(f'File {args.inputfile} not found') raise except json.JSONDecodeError: logging.error(f'Error decoding JSON from file {args.inputfile}') raise comment_data = {c['id']: c for c in sorted( info_dict['comments'], key=lambda c: c.get('timestamp') or 0)} count = len(info_dict['comments']) nested_comments = [] for i, (cid, c) in enumerate(comment_data.items(), 1): logging.info(f'Processing comment {i}/{count}') parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', []) parent.append(c) nested_comments = filter_func(nested_comments) logging.info('Converting to html') def wrap_html(data, top_level=True): html_content = '<ul>' for comment in data: author = html.escape(comment.get("author", "Anonymous")) text = html.escape(comment["text"]).replace('\n', '<br>') # Convert newlines to <br> timestamp = html.escape(comment.get("timestamp", "")) html_content += f'<li><div class="comment-box">' html_content += f'<p><strong>{author}:</strong> <div class="comment-text">{text}</div></p>' # Wrap text in div with a class for styling if timestamp: html_content += f'<p><small>{timestamp}</small></p>' if 'replies' in comment and comment['replies']: html_content += wrap_html(comment['replies'], top_level=False) html_content += '</div></li>' html_content += '</ul>' if top_level: style = ''' <style> .comment-box { border: 1px solid #ccc; padding: 10px; } .comments ul { list-style-type: none; padding-left: 20px; } .comment-text { white-space: pre-wrap; /* Preserve whitespace and line breaks */ } </style> ''' meta = '<meta charset="UTF-8">' return f'{meta}{style}<div class="comments">{html_content}</div>' return html_content out = wrap_html(nested_comments) logging.info('Writing file') try: with open(args.outputfile, 'w', encoding='utf-8') as f: f.write(out) logging.info('Done') except IOError as e: logging.error(f'Error writing to file {args.outputfile}: {e}') raise -
pukkandan revised this gist
Jun 22, 2023 . 1 changed file with 14 additions and 10 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -32,6 +32,7 @@ def get_fields(dct): if val is not None: yield name, val def filter_func(comments): return [dict(get_fields(c)) for c in comments] @@ -41,6 +42,7 @@ def filter_func(comments): 'author': dict.get, 'timestamp': lambda dct, name: dct.get(name) and datetime.strftime( datetime.utcfromtimestamp(dct.get(name)), '%Y/%m/%d'), # Add more fields here 'replies': lambda dct, name: filter_func(dct.get(name, [])) or None } @@ -57,6 +59,16 @@ def filter_func(comments): args = parser.parse_args() ext = os.path.splitext(args.outputfile)[1][1:] if ext == 'html': # Error early try: from json2html import json2html except ImportError: raise SystemExit('ERROR: json2html is required for html output. Install it with pip install json2html') elif ext != 'json': raise SystemExit(f'ERROR: Only json and html formats are supported, not {ext}') print('Reading file') with open(args.inputfile, encoding='utf-8') as f: info_dict = json.load(f) @@ -70,11 +82,11 @@ def filter_func(comments): print(f'Processing comment {i}/{count}', end='\r') parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', []) parent.append(c) del parent print('') nested_comments = filter_func(nested_comments) if ext == 'json': @@ -83,19 +95,11 @@ def filter_func(comments): elif ext == 'html': print('Converting to html') out = json2html.convert(nested_comments) del nested_comments print('Writing file') with open(args.outputfile, 'w', encoding='utf-8') as f: f.write(out) -
pukkandan revised this gist
Jun 21, 2023 . 1 changed file with 32 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,7 +20,7 @@ }, ...] """ import os.path import json import argparse from datetime import datetime @@ -49,13 +49,14 @@ def filter_func(comments): parser.add_argument( '--input-file', '-i', dest='inputfile', metavar='FILE', required=True, help='File to read video metadata from (info.json)') parser.add_argument( '--output-file', '-o', dest='outputfile', metavar='FILE', required=True, help='File to write comments to (json / html)') args = parser.parse_args() print('Reading file') with open(args.inputfile, encoding='utf-8') as f: info_dict = json.load(f) @@ -70,8 +71,32 @@ def filter_func(comments): parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', []) parent.append(c) print('') nested_comments = filter_func(nested_comments) ext = os.path.splitext(args.outputfile)[1][1:] if ext == 'json': print('Converting to json') out = json.dumps(nested_comments, indent=4, ensure_ascii=False) elif ext == 'html': try: from json2html import json2html except ImportError: raise SystemExit('\nERROR: json2html is required for html output. Install it with pip install json2html') print('Converting to html') out = json2html.convert(nested_comments) else: raise SystemExit(f'\nERROR: Only json and html formats are supported, not {ext}') print('Writing file') with open(args.outputfile, 'w', encoding='utf-8') as f: f.write(out) print('Done') -
pukkandan revised this gist
Oct 12, 2022 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -57,7 +57,7 @@ def filter_func(comments): args = parser.parse_args() print('Reading file') with open(args.inputfile, encoding='utf-8') as f: info_dict = json.load(f) comment_data = {c['id']: c for c in sorted( -
pukkandan revised this gist
Jul 6, 2022 . 1 changed file with 2 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,5 @@ #!/usr/bin/env python3 """ SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT Copyright © 2021 [email protected] -
pukkandan renamed this gist
Dec 26, 2021 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
pukkandan created this gist
Dec 26, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,75 @@ """ SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT Copyright © 2021 [email protected] * Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote * Change FIELDS according to your needs The output file will be in the format: [{ 'text': 'comment 1', ... 'replies': [{ 'text': 'reply 1', ... 'replies': [...], }, ...], }, ...] """ import json import argparse from datetime import datetime def get_fields(dct): for name, fn in FIELDS.items(): val = fn(dct, name) if val is not None: yield name, val def filter_func(comments): return [dict(get_fields(c)) for c in comments] FIELDS = { 'text': dict.get, 'author': dict.get, 'timestamp': lambda dct, name: dct.get(name) and datetime.strftime( datetime.utcfromtimestamp(dct.get(name)), '%Y/%m/%d'), 'replies': lambda dct, name: filter_func(dct.get(name, [])) or None } parser = argparse.ArgumentParser() parser.add_argument( '--input-file', '-i', dest='inputfile', metavar='FILE', required=True, help='File to read info_dict from') parser.add_argument( '--output-file', '-o', dest='outputfile', metavar='FILE', required=True, help='File to write comments to') args = parser.parse_args() print('Reading file') with open(args.inputfile) as f: info_dict = json.load(f) comment_data = {c['id']: c for c in sorted( info_dict['comments'], key=lambda c: c.get('timestamp') or 0)} count = len(info_dict['comments']) del info_dict nested_comments = [] for i, (cid, c) in enumerate(comment_data.items(), 1): print(f'Processing comment {i}/{count}', end='\r') parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', []) parent.append(c) print('\nWriting file') with open(args.outputfile, 'w', encoding='utf-8') as f: json.dump(filter_func(nested_comments), f, indent=4, ensure_ascii=False) print('Done')