Skip to content

Instantly share code, notes, and snippets.

@nglehuy
Created May 13, 2025 14:27
Show Gist options
  • Save nglehuy/a3ef6a7bc34d2d0f19d94a82a3313477 to your computer and use it in GitHub Desktop.
Save nglehuy/a3ef6a7bc34d2d0f19d94a82a3313477 to your computer and use it in GitHub Desktop.

Revisions

  1. nglehuy renamed this gist May 13, 2025. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. nglehuy created this gist May 13, 2025.
    135 changes: 135 additions & 0 deletions main.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,135 @@
    import math
    import os
    from faster_whisper import WhisperModel
    import ffmpeg
    import re
    import fire

    os.environ["PYTHONUNBUFFERED"] = "1"


    def extract_audio(input_video, input_video_name):
    extracted_audio = f"audio-{input_video_name}.wav"
    if os.path.exists(extracted_audio):
    return extracted_audio
    stream = ffmpeg.input(input_video)
    stream = ffmpeg.output(stream, extracted_audio)
    ffmpeg.run(stream, overwrite_output=True)
    return extracted_audio


    def transcribe(audio):
    model = WhisperModel("medium")
    segments, info = model.transcribe(
    audio, language="vi", log_progress=True, multilingual=True
    )
    language = info.language
    print("Transcription language", info.language)
    return language, segments


    def format_time(seconds):
    hours = math.floor(seconds / 3600)
    seconds %= 3600
    minutes = math.floor(seconds / 60)
    seconds %= 60
    milliseconds = round((seconds - math.floor(seconds)) * 1000)
    seconds = math.floor(seconds)
    formatted_time = f"{hours:02d}:{minutes:02d}:{seconds:01d},{milliseconds:03d}"

    return formatted_time


    def generate_subtitle_file(language, segments, input_video_name):
    subtitle_file = f"sub2-{input_video_name}.{language}.srt"

    with open(subtitle_file, "w", encoding="utf-8") as f:
    for index, segment in enumerate(segments):
    segment_start = format_time(segment.start)
    segment_end = format_time(segment.end)
    text = ""
    text += f"{str(index+1)} \n"
    text += f"{segment_start} --> {segment_end} \n"
    text += f"{segment.text} \n"
    text += "\n"
    f.write(text)
    f.flush()

    return subtitle_file


    def add_subtitle_to_video(input_video, subtitle_file):
    input_video_name = os.path.splitext(os.path.basename(input_video))[0]
    video_input_stream = ffmpeg.input(input_video)
    output_video = f"output-{input_video_name}.mp4"
    stream = ffmpeg.output(
    video_input_stream, output_video, vf=f"subtitles={subtitle_file}"
    )
    ffmpeg.run(stream, overwrite_output=True)


    def extract_subtitles(
    input_video: str,
    ):
    input_video_name = os.path.splitext(os.path.basename(input_video))[0]
    audio = extract_audio(input_video, input_video_name)
    language, segments = transcribe(audio=audio)
    generate_subtitle_file(
    language=language, segments=segments, input_video_name=input_video_name
    )


    def srt_to_dict(srt_file):
    subtitles = []

    with open(srt_file, "r", encoding="utf-8") as file:
    content = re.sub(r"\n{3,}", "\n\n", file.read().strip()).split("\n\n")

    indexes = set()

    for subtitle_block in content:
    lines = subtitle_block.split("\n")

    # Extract index, time range, and subtitle text
    if len(lines) >= 3:
    index = int(lines[0].strip()) # Subtitle index
    start, end = lines[1].split(" --> ") # Start and end time
    text = " ".join(lines[2:]).strip() # The subtitle text

    # Create a dictionary for each subtitle block
    subtitle_dict = {"index": index, "start": start, "end": end, "text": text}
    indexes.add(index)

    subtitles.append(subtitle_dict)
    print(len(subtitles), set(range(1, subtitles[-1]["index"] + 1)) - indexes)
    return subtitles


    def reindex_srt_file(srt_file, output_srt_file):
    subtitles = srt_to_dict(srt_file)
    new_subtitles = []
    for i, subtitle in enumerate(subtitles):
    subtitle["index"] = i + 1
    new_subtitles.append(subtitle)

    with open(output_srt_file, "w", encoding="utf-8") as file:
    for subtitle in new_subtitles:
    file.write(f"{subtitle['index']}\n")
    file.write(f"{subtitle['start']} --> {subtitle['end']}\n")
    file.write(f"{subtitle['text']}\n\n")


    def convert_srt_to_ass(input_srt, output_ass):
    ffmpeg.input(input_srt).output(output_ass).run()


    if __name__ == "__main__":
    fire.Fire(
    {
    "extract_subtitles": extract_subtitles,
    "add_subtitle_to_video": add_subtitle_to_video,
    "srt_to_dict": srt_to_dict,
    "reindex_srt_file": reindex_srt_file,
    "convert_srt_to_ass": convert_srt_to_ass,
    }
    )