rasbt · January 26, 2023 17:46 · Jan 26, 2023
diff --git a/whisper-audio-to-text.py b/whisper-audio-to-text.py
@@ -0,0 +1,65 @@
+# Setup:
+# conda create -n whisper python=3.9
+# conda activate whisper
+# https://github.com/openai/whisper
+# pip install git+https://github.com/openai/whisper.git 
+
+# Usage:
+# python whisper-audio-to-text.py --audio_dir my_files --out_dir texts
+
+import argparse
+import os
+import os.path as osp
+import subprocess
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+    "--audio_dir", help="Path to the folder containing the input files.", type=str, required=True
+)
+
+parser.add_argument(
+    "--out_dir", help="Path to the target folder for the transcripts.", type=str, required=True
+)
+
+parser.add_argument(
+    "--whisper_model", type=str, choices=("small", "medium", "large"), default="medium"
+)
+
+parser.add_argument(
+    "--print_only", type=str, default="false",
+    help="Only prints the files to be processed instead "
+         "of actually processing them", choices=("true", "false")
+)
+
+args = parser.parse_args()
+args_d = {"true": True, "false": False}
+args.print_only = args_d[args.print_only]
+
+files_to_process = []
+for file in os.listdir(args.audio_dir):
+    if file.endswith(('.aac', '.mp3')):
+        files_to_process.append(file)
+
+print(f"Processing {len(files_to_process)} files:")
+print("\n".join(files_to_process))
+input("Press Enter to continue / CTRL-C to cancel.")
+print(20*'-')
+
+if not osp.exists(args.out_dir):
+    os.makedirs(args.out_dir)
+
+for file in files_to_process:
+    print('Processing:', file)
+
+    cmd = (f"whisper {osp.join(args.audio_dir, file)} --model {args.whisper_model}  "
+           f"--language English --output_dir {args.out_dir} --verbose True "
+           "--task transcribe --output_format txt")
+
+    if args.print_only:
+        print(cmd)
+    else:
+        subprocess.run(cmd.split(), check=True)
+
+    print('Finished processing', file)
+    print(20*'-')