Created
May 13, 2025 14:27
-
-
Save nglehuy/a3ef6a7bc34d2d0f19d94a82a3313477 to your computer and use it in GitHub Desktop.
Revisions
-
nglehuy renamed this gist
May 13, 2025 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
nglehuy created this gist
May 13, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,135 @@ import math import os from faster_whisper import WhisperModel import ffmpeg import re import fire os.environ["PYTHONUNBUFFERED"] = "1" def extract_audio(input_video, input_video_name): extracted_audio = f"audio-{input_video_name}.wav" if os.path.exists(extracted_audio): return extracted_audio stream = ffmpeg.input(input_video) stream = ffmpeg.output(stream, extracted_audio) ffmpeg.run(stream, overwrite_output=True) return extracted_audio def transcribe(audio): model = WhisperModel("medium") segments, info = model.transcribe( audio, language="vi", log_progress=True, multilingual=True ) language = info.language print("Transcription language", info.language) return language, segments def format_time(seconds): hours = math.floor(seconds / 3600) seconds %= 3600 minutes = math.floor(seconds / 60) seconds %= 60 milliseconds = round((seconds - math.floor(seconds)) * 1000) seconds = math.floor(seconds) formatted_time = f"{hours:02d}:{minutes:02d}:{seconds:01d},{milliseconds:03d}" return formatted_time def generate_subtitle_file(language, segments, input_video_name): subtitle_file = f"sub2-{input_video_name}.{language}.srt" with open(subtitle_file, "w", encoding="utf-8") as f: for index, segment in enumerate(segments): segment_start = format_time(segment.start) segment_end = format_time(segment.end) text = "" text += f"{str(index+1)} \n" text += f"{segment_start} --> {segment_end} \n" text += f"{segment.text} \n" text += "\n" f.write(text) f.flush() return subtitle_file def add_subtitle_to_video(input_video, subtitle_file): input_video_name = os.path.splitext(os.path.basename(input_video))[0] video_input_stream = ffmpeg.input(input_video) output_video = f"output-{input_video_name}.mp4" stream = ffmpeg.output( video_input_stream, output_video, vf=f"subtitles={subtitle_file}" ) ffmpeg.run(stream, overwrite_output=True) def extract_subtitles( input_video: str, ): input_video_name = os.path.splitext(os.path.basename(input_video))[0] audio = extract_audio(input_video, input_video_name) language, segments = transcribe(audio=audio) generate_subtitle_file( language=language, segments=segments, input_video_name=input_video_name ) def srt_to_dict(srt_file): subtitles = [] with open(srt_file, "r", encoding="utf-8") as file: content = re.sub(r"\n{3,}", "\n\n", file.read().strip()).split("\n\n") indexes = set() for subtitle_block in content: lines = subtitle_block.split("\n") # Extract index, time range, and subtitle text if len(lines) >= 3: index = int(lines[0].strip()) # Subtitle index start, end = lines[1].split(" --> ") # Start and end time text = " ".join(lines[2:]).strip() # The subtitle text # Create a dictionary for each subtitle block subtitle_dict = {"index": index, "start": start, "end": end, "text": text} indexes.add(index) subtitles.append(subtitle_dict) print(len(subtitles), set(range(1, subtitles[-1]["index"] + 1)) - indexes) return subtitles def reindex_srt_file(srt_file, output_srt_file): subtitles = srt_to_dict(srt_file) new_subtitles = [] for i, subtitle in enumerate(subtitles): subtitle["index"] = i + 1 new_subtitles.append(subtitle) with open(output_srt_file, "w", encoding="utf-8") as file: for subtitle in new_subtitles: file.write(f"{subtitle['index']}\n") file.write(f"{subtitle['start']} --> {subtitle['end']}\n") file.write(f"{subtitle['text']}\n\n") def convert_srt_to_ass(input_srt, output_ass): ffmpeg.input(input_srt).output(output_ass).run() if __name__ == "__main__": fire.Fire( { "extract_subtitles": extract_subtitles, "add_subtitle_to_video": add_subtitle_to_video, "srt_to_dict": srt_to_dict, "reindex_srt_file": reindex_srt_file, "convert_srt_to_ass": convert_srt_to_ass, } )