-
-
Save vico/caa9d974c5583208ceb816fbcd1fda1d to your computer and use it in GitHub Desktop.
Revisions
-
Sebastian Raschka created this gist
Sep 25, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,46 @@ # Sebastian Raschka 09/24/2022 # Create a new conda environment and packages # conda create -n whisper python=3.9 # conda activate whisper # conda install mlxtend -c conda-forge # Install ffmpeg # macOS & homebrew # brew install ffmpeg # Ubuntu # sudo apt-get ffmpeg # Install whisper # from repo https://github.com/openai/whisper # pip install git+https://github.com/openai/whisper.git import os import os.path as osp from mlxtend.file_io import find_files from mlxtend.utils import Counter all_videos = find_files(substring=".mp4", path="./", recursive=True) print("Example path:", all_videos[0]) print("Number of videos to process:", len(all_videos)) audio_outdir = "./extracted_audio" subtitle_outdir = "./generated_subtitles" for this_dir in (audio_outdir, subtitle_outdir): if not osp.exists(this_dir): os.mkdir(this_dir) cnt = Counter() for v in all_videos: base, ext = osp.splitext(v) aac_file_out = osp.join(audio_outdir, osp.basename(base)) + ".aac" # exctract audio file from video os.system(f"ffmpeg -i {v} -vn -acodec copy {aac_file_out} ") os.system( f"whisper {aac_file_out} --model medium --language English --output_dir {subtitle_outdir} --verbose False" ) cnt.update()