Skip to content

Instantly share code, notes, and snippets.

@rasbt
Created January 26, 2023 17:46
Show Gist options
  • Save rasbt/4bfd783328171972d7aaf77e83489c28 to your computer and use it in GitHub Desktop.
Save rasbt/4bfd783328171972d7aaf77e83489c28 to your computer and use it in GitHub Desktop.

Revisions

  1. Sebastian Raschka created this gist Jan 26, 2023.
    65 changes: 65 additions & 0 deletions whisper-audio-to-text.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,65 @@
    # Setup:
    # conda create -n whisper python=3.9
    # conda activate whisper
    # https://github.com/openai/whisper
    # pip install git+https://github.com/openai/whisper.git

    # Usage:
    # python whisper-audio-to-text.py --audio_dir my_files --out_dir texts

    import argparse
    import os
    import os.path as osp
    import subprocess

    parser = argparse.ArgumentParser()

    parser.add_argument(
    "--audio_dir", help="Path to the folder containing the input files.", type=str, required=True
    )

    parser.add_argument(
    "--out_dir", help="Path to the target folder for the transcripts.", type=str, required=True
    )

    parser.add_argument(
    "--whisper_model", type=str, choices=("small", "medium", "large"), default="medium"
    )

    parser.add_argument(
    "--print_only", type=str, default="false",
    help="Only prints the files to be processed instead "
    "of actually processing them", choices=("true", "false")
    )

    args = parser.parse_args()
    args_d = {"true": True, "false": False}
    args.print_only = args_d[args.print_only]

    files_to_process = []
    for file in os.listdir(args.audio_dir):
    if file.endswith(('.aac', '.mp3')):
    files_to_process.append(file)

    print(f"Processing {len(files_to_process)} files:")
    print("\n".join(files_to_process))
    input("Press Enter to continue / CTRL-C to cancel.")
    print(20*'-')

    if not osp.exists(args.out_dir):
    os.makedirs(args.out_dir)

    for file in files_to_process:
    print('Processing:', file)

    cmd = (f"whisper {osp.join(args.audio_dir, file)} --model {args.whisper_model} "
    f"--language English --output_dir {args.out_dir} --verbose True "
    "--task transcribe --output_format txt")

    if args.print_only:
    print(cmd)
    else:
    subprocess.run(cmd.split(), check=True)

    print('Finished processing', file)
    print(20*'-')