Skip to content

Instantly share code, notes, and snippets.

@tinyapps
Forked from pukkandan/ytdlp_nest_comments.py
Last active October 15, 2025 19:09
Show Gist options
  • Save tinyapps/df2b6757a142ff93caf9c63d0ef38b11 to your computer and use it in GitHub Desktop.
Save tinyapps/df2b6757a142ff93caf9c63d0ef38b11 to your computer and use it in GitHub Desktop.

Revisions

  1. tinyapps revised this gist Jul 14, 2024. 1 changed file with 9 additions and 0 deletions.
    9 changes: 9 additions & 0 deletions ytdlp_nest_comments.py
    Original file line number Diff line number Diff line change
    @@ -108,6 +108,15 @@ def wrap_html(data, top_level=True):
    .comment-text {
    white-space: pre-wrap; /* Preserve whitespace and line breaks */
    }
    @media (prefers-color-scheme: dark) {
    body {
    background-color: #121212;
    color: #e0e0e0;
    }
    .comment-box {
    border-color: #444;
    }
    }
    </style>
    '''
    meta = '<meta charset="UTF-8">'
  2. tinyapps revised this gist Jul 14, 2024. 1 changed file with 75 additions and 55 deletions.
    130 changes: 75 additions & 55 deletions ytdlp_nest_comments.py
    Original file line number Diff line number Diff line change
    @@ -1,42 +1,35 @@
    #!/usr/bin/env python3

    """
    SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT
    Copyright © 2021 [email protected]
    * Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote
    * Change FIELDS according to your needs
    The output file will be in the format:
    [{
    'text': 'comment 1',
    ...
    'replies': [{
    'text': 'reply 1',
    ...
    'replies': [...],
    }, ...],
    }, ...]
    Forked and modified from pukkandan/ytdlp_nest_comments.py:
    https://gist.github.com/pukkandan/ee737fec64822f2552caf3ca4cbf5db7
    which included this license and copyright information:
    "SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT
    Copyright © 2021 [email protected]"
    Convert YouTube comments from an info.json file (acquired via
    yt-dlp --write-comments) to HTML.
    """

    import os.path
    import json
    import argparse
    import logging
    from datetime import datetime
    import html

    # Configure logging
    logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

    def get_fields(dct):
    for name, fn in FIELDS.items():
    val = fn(dct, name)
    if val is not None:
    yield name, val


    def filter_func(comments):
    return [dict(get_fields(c)) for c in comments]


    FIELDS = {
    'text': dict.get,
    'author': dict.get,
    @@ -46,7 +39,6 @@ def filter_func(comments):
    'replies': lambda dct, name: filter_func(dct.get(name, [])) or None
    }


    parser = argparse.ArgumentParser()
    parser.add_argument(
    '--input-file', '-i',
    @@ -55,52 +47,80 @@ def filter_func(comments):
    parser.add_argument(
    '--output-file', '-o',
    dest='outputfile', metavar='FILE', required=True,
    help='File to write comments to (json / html)')
    help='File to write comments to (html)')
    args = parser.parse_args()


    ext = os.path.splitext(args.outputfile)[1][1:]
    if ext == 'html': # Error early
    try:
    from json2html import json2html
    except ImportError:
    raise SystemExit('ERROR: json2html is required for html output. Install it with pip install json2html')
    elif ext != 'json':
    raise SystemExit(f'ERROR: Only json and html formats are supported, not {ext}')


    print('Reading file')
    with open(args.inputfile, encoding='utf-8') as f:
    info_dict = json.load(f)
    if ext != 'html':
    raise SystemExit(f'ERROR: Only html format is supported, not {ext}')

    logging.info('Reading file')
    try:
    with open(args.inputfile, encoding='utf-8') as f:
    info_dict = json.load(f)
    except FileNotFoundError:
    logging.error(f'File {args.inputfile} not found')
    raise
    except json.JSONDecodeError:
    logging.error(f'Error decoding JSON from file {args.inputfile}')
    raise

    comment_data = {c['id']: c for c in sorted(
    info_dict['comments'], key=lambda c: c.get('timestamp') or 0)}
    count = len(info_dict['comments'])
    del info_dict
    nested_comments = []
    for i, (cid, c) in enumerate(comment_data.items(), 1):
    print(f'Processing comment {i}/{count}', end='\r')
    logging.info(f'Processing comment {i}/{count}')
    parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', [])
    parent.append(c)
    del parent


    print('')
    nested_comments = filter_func(nested_comments)


    if ext == 'json':
    print('Converting to json')
    out = json.dumps(nested_comments, indent=4, ensure_ascii=False)


    elif ext == 'html':
    print('Converting to html')
    out = json2html.convert(nested_comments)


    del nested_comments
    print('Writing file')
    with open(args.outputfile, 'w', encoding='utf-8') as f:
    f.write(out)
    print('Done')
    logging.info('Converting to html')

    def wrap_html(data, top_level=True):
    html_content = '<ul>'
    for comment in data:
    author = html.escape(comment.get("author", "Anonymous"))
    text = html.escape(comment["text"]).replace('\n', '<br>') # Convert newlines to <br>
    timestamp = html.escape(comment.get("timestamp", ""))

    html_content += f'<li><div class="comment-box">'
    html_content += f'<p><strong>{author}:</strong> <div class="comment-text">{text}</div></p>' # Wrap text in div with a class for styling
    if timestamp:
    html_content += f'<p><small>{timestamp}</small></p>'
    if 'replies' in comment and comment['replies']:
    html_content += wrap_html(comment['replies'], top_level=False)
    html_content += '</div></li>'
    html_content += '</ul>'

    if top_level:
    style = '''
    <style>
    .comment-box {
    border: 1px solid #ccc;
    padding: 10px;
    }
    .comments ul {
    list-style-type: none;
    padding-left: 20px;
    }
    .comment-text {
    white-space: pre-wrap; /* Preserve whitespace and line breaks */
    }
    </style>
    '''
    meta = '<meta charset="UTF-8">'
    return f'{meta}{style}<div class="comments">{html_content}</div>'
    return html_content

    out = wrap_html(nested_comments)

    logging.info('Writing file')
    try:
    with open(args.outputfile, 'w', encoding='utf-8') as f:
    f.write(out)
    logging.info('Done')
    except IOError as e:
    logging.error(f'Error writing to file {args.outputfile}: {e}')
    raise
  3. @pukkandan pukkandan revised this gist Jun 22, 2023. 1 changed file with 14 additions and 10 deletions.
    24 changes: 14 additions & 10 deletions ytdlp_nest_comments.py
    Original file line number Diff line number Diff line change
    @@ -32,6 +32,7 @@ def get_fields(dct):
    if val is not None:
    yield name, val


    def filter_func(comments):
    return [dict(get_fields(c)) for c in comments]

    @@ -41,6 +42,7 @@ def filter_func(comments):
    'author': dict.get,
    'timestamp': lambda dct, name: dct.get(name) and datetime.strftime(
    datetime.utcfromtimestamp(dct.get(name)), '%Y/%m/%d'),
    # Add more fields here
    'replies': lambda dct, name: filter_func(dct.get(name, [])) or None
    }

    @@ -57,6 +59,16 @@ def filter_func(comments):
    args = parser.parse_args()


    ext = os.path.splitext(args.outputfile)[1][1:]
    if ext == 'html': # Error early
    try:
    from json2html import json2html
    except ImportError:
    raise SystemExit('ERROR: json2html is required for html output. Install it with pip install json2html')
    elif ext != 'json':
    raise SystemExit(f'ERROR: Only json and html formats are supported, not {ext}')


    print('Reading file')
    with open(args.inputfile, encoding='utf-8') as f:
    info_dict = json.load(f)
    @@ -70,11 +82,11 @@ def filter_func(comments):
    print(f'Processing comment {i}/{count}', end='\r')
    parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', [])
    parent.append(c)
    del parent


    print('')
    nested_comments = filter_func(nested_comments)
    ext = os.path.splitext(args.outputfile)[1][1:]


    if ext == 'json':
    @@ -83,19 +95,11 @@ def filter_func(comments):


    elif ext == 'html':
    try:
    from json2html import json2html
    except ImportError:
    raise SystemExit('\nERROR: json2html is required for html output. Install it with pip install json2html')

    print('Converting to html')
    out = json2html.convert(nested_comments)


    else:
    raise SystemExit(f'\nERROR: Only json and html formats are supported, not {ext}')


    del nested_comments
    print('Writing file')
    with open(args.outputfile, 'w', encoding='utf-8') as f:
    f.write(out)
  4. @pukkandan pukkandan revised this gist Jun 21, 2023. 1 changed file with 32 additions and 7 deletions.
    39 changes: 32 additions & 7 deletions ytdlp_nest_comments.py
    Original file line number Diff line number Diff line change
    @@ -20,7 +20,7 @@
    }, ...]
    """


    import os.path
    import json
    import argparse
    from datetime import datetime
    @@ -49,13 +49,14 @@ def filter_func(comments):
    parser.add_argument(
    '--input-file', '-i',
    dest='inputfile', metavar='FILE', required=True,
    help='File to read info_dict from')
    help='File to read video metadata from (info.json)')
    parser.add_argument(
    '--output-file', '-o',
    dest='outputfile', metavar='FILE', required=True,
    help='File to write comments to')
    help='File to write comments to (json / html)')
    args = parser.parse_args()


    print('Reading file')
    with open(args.inputfile, encoding='utf-8') as f:
    info_dict = json.load(f)
    @@ -70,8 +71,32 @@ def filter_func(comments):
    parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', [])
    parent.append(c)

    print('\nWriting file')
    with open(args.outputfile, 'w', encoding='utf-8') as f:
    json.dump(filter_func(nested_comments), f, indent=4, ensure_ascii=False)

    print('Done')
    print('')
    nested_comments = filter_func(nested_comments)
    ext = os.path.splitext(args.outputfile)[1][1:]


    if ext == 'json':
    print('Converting to json')
    out = json.dumps(nested_comments, indent=4, ensure_ascii=False)


    elif ext == 'html':
    try:
    from json2html import json2html
    except ImportError:
    raise SystemExit('\nERROR: json2html is required for html output. Install it with pip install json2html')

    print('Converting to html')
    out = json2html.convert(nested_comments)


    else:
    raise SystemExit(f'\nERROR: Only json and html formats are supported, not {ext}')


    print('Writing file')
    with open(args.outputfile, 'w', encoding='utf-8') as f:
    f.write(out)
    print('Done')
  5. @pukkandan pukkandan revised this gist Oct 12, 2022. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion ytdlp_nest_comments.py
    Original file line number Diff line number Diff line change
    @@ -57,7 +57,7 @@ def filter_func(comments):
    args = parser.parse_args()

    print('Reading file')
    with open(args.inputfile) as f:
    with open(args.inputfile, encoding='utf-8') as f:
    info_dict = json.load(f)

    comment_data = {c['id']: c for c in sorted(
  6. @pukkandan pukkandan revised this gist Jul 6, 2022. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions ytdlp_nest_comments.py
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,5 @@
    #!/usr/bin/env python3

    """
    SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT
    Copyright © 2021 [email protected]
  7. @pukkandan pukkandan renamed this gist Dec 26, 2021. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  8. @pukkandan pukkandan created this gist Dec 26, 2021.
    75 changes: 75 additions & 0 deletions nest-comments.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,75 @@
    """
    SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT
    Copyright © 2021 [email protected]
    * Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote
    * Change FIELDS according to your needs
    The output file will be in the format:
    [{
    'text': 'comment 1',
    ...
    'replies': [{
    'text': 'reply 1',
    ...
    'replies': [...],
    }, ...],
    }, ...]
    """


    import json
    import argparse
    from datetime import datetime


    def get_fields(dct):
    for name, fn in FIELDS.items():
    val = fn(dct, name)
    if val is not None:
    yield name, val

    def filter_func(comments):
    return [dict(get_fields(c)) for c in comments]


    FIELDS = {
    'text': dict.get,
    'author': dict.get,
    'timestamp': lambda dct, name: dct.get(name) and datetime.strftime(
    datetime.utcfromtimestamp(dct.get(name)), '%Y/%m/%d'),
    'replies': lambda dct, name: filter_func(dct.get(name, [])) or None
    }


    parser = argparse.ArgumentParser()
    parser.add_argument(
    '--input-file', '-i',
    dest='inputfile', metavar='FILE', required=True,
    help='File to read info_dict from')
    parser.add_argument(
    '--output-file', '-o',
    dest='outputfile', metavar='FILE', required=True,
    help='File to write comments to')
    args = parser.parse_args()

    print('Reading file')
    with open(args.inputfile) as f:
    info_dict = json.load(f)

    comment_data = {c['id']: c for c in sorted(
    info_dict['comments'], key=lambda c: c.get('timestamp') or 0)}
    count = len(info_dict['comments'])
    del info_dict
    nested_comments = []
    for i, (cid, c) in enumerate(comment_data.items(), 1):
    print(f'Processing comment {i}/{count}', end='\r')
    parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', [])
    parent.append(c)

    print('\nWriting file')
    with open(args.outputfile, 'w', encoding='utf-8') as f:
    json.dump(filter_func(nested_comments), f, indent=4, ensure_ascii=False)

    print('Done')