Created
March 4, 2024 02:31
-
-
Save jkerhin/07cc4aee8e2f913c19eff1e40e3ae1af to your computer and use it in GitHub Desktop.
Quick wrapper script around whisper.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Quick wrapper around whisper.cpp to allow transcription of arbitrary media | |
| Automatically create .wav file using FFMPEG, then transcribe it using the | |
| (built seprately) whisper.cpp binary with most output formats. | |
| """ | |
| import sys | |
| import subprocess | |
| from pathlib import Path | |
| WHISPER_DIR = Path("/home/joe/whisper.cpp") | |
| def transcribe_media(media_pth: Path) -> None: | |
| """Convert media audio to 16k mono wav, transcribe with whisper.cpp""" | |
| in_file = media_pth.name | |
| out_file = f"{media_pth.stem}.wav" | |
| proc = subprocess.run( | |
| [ | |
| "ffmpeg", | |
| "-i", | |
| str(in_file), | |
| "-ar", | |
| "16000", | |
| "-ac", | |
| "1", | |
| "-c:a", | |
| "pcm_s16le", | |
| str(out_file), | |
| ], | |
| cwd=media_pth.parent, | |
| ) | |
| if proc.returncode: | |
| raise RuntimeError(f"Bad exit from ffmpeg! {proc}") | |
| proc = subprocess.run( | |
| [ | |
| f"{WHISPER_DIR}/main", | |
| "-m", | |
| f"{WHISPER_DIR}/models/ggml-medium.en.bin", | |
| "-otxt", | |
| "-ovtt", | |
| "-osrt", | |
| "-ocsv", | |
| "-pp", | |
| "-pc", | |
| "-f", | |
| str(out_file), | |
| ], | |
| cwd=media_pth.parent, | |
| ) | |
| if proc.returncode: | |
| raise RuntimeError(f"Bad exit from whisper.cpp! {proc}") | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print(f"{sys.argv[0]} FILENAME [FILENAME]") | |
| for arg in sys.argv[1:]: | |
| pth = Path(arg) | |
| if not pth.is_file(): | |
| print(f"Skipping {arg} - not a file") | |
| continue | |
| pth = pth.resolve() | |
| transcribe_media(pth) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment