rasbt · March 20, 2025 13:55 · Nov 30, 2022 · Sep 25, 2022
diff --git a/video-subtitles-via-whisper.py b/video-subtitles-via-whisper.py
@@ -8,7 +8,7 @@
 # macOS & homebrew
 #   brew install ffmpeg
 # Ubuntu
-#   sudo apt-get ffmpeg
+#   sudo apt-get install ffmpeg
 
 # Install whisper
 # from repo https://github.com/openai/whisper

diff --git a/video-subtitles-via-whisper.py b/video-subtitles-via-whisper.py
@@ -0,0 +1,46 @@
+# Sebastian Raschka 09/24/2022
+# Create a new conda environment and packages
+#   conda create -n whisper python=3.9
+#   conda activate whisper
+#   conda install mlxtend -c conda-forge
+
+# Install ffmpeg
+# macOS & homebrew
+#   brew install ffmpeg
+# Ubuntu
+#   sudo apt-get ffmpeg
+
+# Install whisper
+# from repo https://github.com/openai/whisper
+#   pip install git+https://github.com/openai/whisper.git
+
+import os
+import os.path as osp
+from mlxtend.file_io import find_files
+from mlxtend.utils import Counter
+
+
+all_videos = find_files(substring=".mp4", path="./", recursive=True)
+print("Example path:", all_videos[0])
+print("Number of videos to process:", len(all_videos))
+
+audio_outdir = "./extracted_audio"
+subtitle_outdir = "./generated_subtitles"
+
+for this_dir in (audio_outdir, subtitle_outdir):
+    if not osp.exists(this_dir):
+        os.mkdir(this_dir)
+
+cnt = Counter()
+for v in all_videos:
+
+    base, ext = osp.splitext(v)
+    aac_file_out = osp.join(audio_outdir, osp.basename(base)) + ".aac"
+
+    # exctract audio file from video
+    os.system(f"ffmpeg -i {v} -vn -acodec copy {aac_file_out} ")
+
+    os.system(
+        f"whisper {aac_file_out} --model medium  --language English --output_dir {subtitle_outdir} --verbose False"
+    )
+    cnt.update()