# Sebastian Raschka 09/24/2022
# Create a new conda environment and packages
#   conda create -n whisper python=3.9
#   conda activate whisper
#   conda install mlxtend -c conda-forge

# Install ffmpeg
# macOS & homebrew
#   brew install ffmpeg
# Ubuntu
#   sudo apt-get ffmpeg

# Install whisper
# from repo https://github.com/openai/whisper
#   pip install git+https://github.com/openai/whisper.git

import os
import os.path as osp
from mlxtend.file_io import find_files
from mlxtend.utils import Counter


all_videos = find_files(substring=".mp4", path="./", recursive=True)
print("Example path:", all_videos[0])
print("Number of videos to process:", len(all_videos))

audio_outdir = "./extracted_audio"
subtitle_outdir = "./generated_subtitles"

for this_dir in (audio_outdir, subtitle_outdir):
    if not osp.exists(this_dir):
        os.mkdir(this_dir)

cnt = Counter()
for v in all_videos:

    base, ext = osp.splitext(v)
    aac_file_out = osp.join(audio_outdir, osp.basename(base)) + ".aac"

    # exctract audio file from video
    os.system(f"ffmpeg -i {v} -vn -acodec copy {aac_file_out} ")

    os.system(
        f"whisper {aac_file_out} --model medium  --language English --output_dir {subtitle_outdir} --verbose False"
    )
    cnt.update()