Skip to content

Instantly share code, notes, and snippets.

@swenson
Created November 4, 2023 23:27
Show Gist options
  • Select an option

  • Save swenson/efa268aa5d0d1b0c5e2db32e3e65a771 to your computer and use it in GitHub Desktop.

Select an option

Save swenson/efa268aa5d0d1b0c5e2db32e3e65a771 to your computer and use it in GitHub Desktop.

Revisions

  1. swenson created this gist Nov 4, 2023.
    12 changes: 12 additions & 0 deletions make-subtitles.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,12 @@
    #!/bin/bash
    set -euxo pipefail

    echo "Converting audio"
    rm -f temp.wav
    ffmpeg -i "$1" -ar 16000 -ac 1 -c:a pcm_s16le temp.wav
    echo "Transcribing"
    ./main -m models/ggml-base.en.bin -f ./temp.wav --output-srt -t 8 -ml 42
    mv temp.wav.srt "${1%.*}.en.srt"
    # optional: rewrite the original file to include subtitles
    #echo "Adding to video file"
    #ffmpeg -i "$1" -i temp.wav.srt -c copy -metadata:s:s:0 language=eng "${1%.*}.subtitled.mkv"
    36 changes: 36 additions & 0 deletions transcribe-missing.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,36 @@
    #!/usr/bin/env python3

    import json
    import os.path
    import os
    import subprocess as sp
    import sys
    import time


    def has_subtitles(fname) -> bool:
    base, _ = os.path.splitext(fname)
    if os.path.exists(base + '.srt') or \
    os.path.exists(base + '.en.srt') or \
    os.path.exists(base + '.english.srt') or \
    os.path.exists(base + '.stt') or \
    os.path.exists(base + '.en.stt') or \
    os.path.exists(base + '.english.stt') or \
    os.path.exists(base + '.sub'):
    return True
    out = sp.check_output(['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', fname], stderr=sp.STDOUT)
    for stream in json.loads(out)['streams']:
    if stream['codec_type'] == 'subtitle':
    return True
    return False


    check_endings = {'.mkv', '.mpg', '.avi', '.mp4', '.m4v', '.mov'}
    for dirpath, dirnames, fnames in os.walk(sys.argv[1]):
    for name in fnames:
    fname = os.path.join(dirpath, name)
    _, ext = os.path.splitext(fname)
    if ext in check_endings:
    if not has_subtitles(fname):
    print(fname)
    sp.check_call(['./make-subtitles.sh', fname])