Skip to content

Instantly share code, notes, and snippets.

@nglehuy
Created May 13, 2025 14:27
Show Gist options
  • Save nglehuy/a3ef6a7bc34d2d0f19d94a82a3313477 to your computer and use it in GitHub Desktop.
Save nglehuy/a3ef6a7bc34d2d0f19d94a82a3313477 to your computer and use it in GitHub Desktop.
Extract subtitles using OpenAI Whisper, add subtitles to video
import math
import os
from faster_whisper import WhisperModel
import ffmpeg
import re
import fire
os.environ["PYTHONUNBUFFERED"] = "1"
def extract_audio(input_video, input_video_name):
extracted_audio = f"audio-{input_video_name}.wav"
if os.path.exists(extracted_audio):
return extracted_audio
stream = ffmpeg.input(input_video)
stream = ffmpeg.output(stream, extracted_audio)
ffmpeg.run(stream, overwrite_output=True)
return extracted_audio
def transcribe(audio):
model = WhisperModel("medium")
segments, info = model.transcribe(
audio, language="vi", log_progress=True, multilingual=True
)
language = info.language
print("Transcription language", info.language)
return language, segments
def format_time(seconds):
hours = math.floor(seconds / 3600)
seconds %= 3600
minutes = math.floor(seconds / 60)
seconds %= 60
milliseconds = round((seconds - math.floor(seconds)) * 1000)
seconds = math.floor(seconds)
formatted_time = f"{hours:02d}:{minutes:02d}:{seconds:01d},{milliseconds:03d}"
return formatted_time
def generate_subtitle_file(language, segments, input_video_name):
subtitle_file = f"sub2-{input_video_name}.{language}.srt"
with open(subtitle_file, "w", encoding="utf-8") as f:
for index, segment in enumerate(segments):
segment_start = format_time(segment.start)
segment_end = format_time(segment.end)
text = ""
text += f"{str(index+1)} \n"
text += f"{segment_start} --> {segment_end} \n"
text += f"{segment.text} \n"
text += "\n"
f.write(text)
f.flush()
return subtitle_file
def add_subtitle_to_video(input_video, subtitle_file):
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
video_input_stream = ffmpeg.input(input_video)
output_video = f"output-{input_video_name}.mp4"
stream = ffmpeg.output(
video_input_stream, output_video, vf=f"subtitles={subtitle_file}"
)
ffmpeg.run(stream, overwrite_output=True)
def extract_subtitles(
input_video: str,
):
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
audio = extract_audio(input_video, input_video_name)
language, segments = transcribe(audio=audio)
generate_subtitle_file(
language=language, segments=segments, input_video_name=input_video_name
)
def srt_to_dict(srt_file):
subtitles = []
with open(srt_file, "r", encoding="utf-8") as file:
content = re.sub(r"\n{3,}", "\n\n", file.read().strip()).split("\n\n")
indexes = set()
for subtitle_block in content:
lines = subtitle_block.split("\n")
# Extract index, time range, and subtitle text
if len(lines) >= 3:
index = int(lines[0].strip()) # Subtitle index
start, end = lines[1].split(" --> ") # Start and end time
text = " ".join(lines[2:]).strip() # The subtitle text
# Create a dictionary for each subtitle block
subtitle_dict = {"index": index, "start": start, "end": end, "text": text}
indexes.add(index)
subtitles.append(subtitle_dict)
print(len(subtitles), set(range(1, subtitles[-1]["index"] + 1)) - indexes)
return subtitles
def reindex_srt_file(srt_file, output_srt_file):
subtitles = srt_to_dict(srt_file)
new_subtitles = []
for i, subtitle in enumerate(subtitles):
subtitle["index"] = i + 1
new_subtitles.append(subtitle)
with open(output_srt_file, "w", encoding="utf-8") as file:
for subtitle in new_subtitles:
file.write(f"{subtitle['index']}\n")
file.write(f"{subtitle['start']} --> {subtitle['end']}\n")
file.write(f"{subtitle['text']}\n\n")
def convert_srt_to_ass(input_srt, output_ass):
ffmpeg.input(input_srt).output(output_ass).run()
if __name__ == "__main__":
fire.Fire(
{
"extract_subtitles": extract_subtitles,
"add_subtitle_to_video": add_subtitle_to_video,
"srt_to_dict": srt_to_dict,
"reindex_srt_file": reindex_srt_file,
"convert_srt_to_ass": convert_srt_to_ass,
}
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment