-
-
Save nimatrueway/4589700f49c691e5413c5b2df4d02f4f to your computer and use it in GitHub Desktop.
| package main | |
| import ( | |
| "time" | |
| "regexp" | |
| "bufio" | |
| "strconv" | |
| "fmt" | |
| "os" | |
| "errors" | |
| "io" | |
| "strings" | |
| ) | |
| type Subtitle struct { | |
| idx int | |
| fromTime time.Duration | |
| toTime time.Duration | |
| text string | |
| } | |
| var timeFramePattern, _ = regexp.Compile(`(\d+):(\d+):(\d+),(\d+) --> (\d+):(\d+):(\d+),(\d+)`) | |
| func getDuration(parts []string) time.Duration { | |
| hour, _ := strconv.Atoi(parts[0]) | |
| minute, _ := strconv.Atoi(parts[1]) | |
| second, _ := strconv.Atoi(parts[2]) | |
| millisecond, _ := strconv.Atoi(parts[3]) | |
| return time.Millisecond * time.Duration(millisecond) + | |
| time.Second * time.Duration(second) + | |
| time.Minute * time.Duration(minute) + | |
| time.Hour * time.Duration(hour) | |
| } | |
| func printDuration(duration time.Duration) string { | |
| hour := duration / time.Hour | |
| duration -= hour * time.Hour | |
| minute := duration / time.Minute | |
| duration -= minute * time.Minute | |
| second := duration / time.Second | |
| duration -= second * time.Second | |
| millisecond := duration / time.Millisecond | |
| return fmt.Sprintf(`%02d:%02d:%02d,%03d`, hour, minute, second, millisecond) | |
| } | |
| func readOneSubtitle(scanner *bufio.Scanner) (*Subtitle, error) { | |
| // read idx | |
| if !scanner.Scan() { | |
| return nil, nil | |
| } | |
| idxRaw := scanner.Text() | |
| idx, err := strconv.Atoi(idxRaw) | |
| if err != nil { | |
| return nil, errors.New("invalid subtitle index") | |
| } | |
| // read timing | |
| if !scanner.Scan() { | |
| return nil, errors.New("could not find subtitle timing") | |
| } | |
| timing := timeFramePattern.FindStringSubmatch(scanner.Text()) | |
| if timing == nil { | |
| return nil, errors.New("invalid subtitle timing") | |
| } | |
| fromTime := getDuration(timing[1:5]) | |
| toTime := getDuration(timing[5:9]) | |
| // read content | |
| if !scanner.Scan() { | |
| return nil, errors.New("could not find subtitle text") | |
| } | |
| content := scanner.Text() | |
| for scanner.Scan() && scanner.Text() != "" { | |
| content += "\n" | |
| content += scanner.Text() | |
| } | |
| subtitle := &Subtitle{idx, fromTime, toTime, content} | |
| return subtitle, nil | |
| } | |
| func writeOneSubtitle(file io.Writer, subtitle *Subtitle, idx *int) error { | |
| _, err := fmt.Fprint(file, | |
| *idx, "\n", | |
| printDuration(subtitle.fromTime), " --> ", printDuration(subtitle.toTime), "\n", | |
| subtitle.text, "\n\n") | |
| *idx++ | |
| return err | |
| } | |
| func main() { | |
| if len(os.Args) < 2 { | |
| println("Provide a subtitle file to fix.\ne.g. subtitle-fixer mysubtitle.srt") | |
| return | |
| } | |
| filePath := os.Args[1] | |
| newFilePath := filePath + ".fixed" | |
| file, _ := os.Open(filePath) | |
| newFile, _ := os.Create(newFilePath) | |
| defer file.Close() | |
| defer newFile.Close() | |
| scanner := bufio.NewScanner(file) | |
| var newIdx = 1 | |
| var lastSubtitle *Subtitle = nil | |
| for { | |
| subtitle, err := readOneSubtitle(scanner) | |
| if lastSubtitle != nil { | |
| if subtitle != nil { | |
| subtitle.text = strings.Trim(subtitle.text, "\n ") | |
| if len(subtitle.text) == 0 { // skip over empty subtitles | |
| continue | |
| } | |
| // skip over super-short subtitles that basically contain what their previous subtitle contains, and just prolong previous subtitle | |
| if subtitle.toTime - subtitle.fromTime < time.Millisecond * 150 && | |
| strings.Contains(lastSubtitle.text, subtitle.text) { | |
| lastSubtitle.toTime = subtitle.toTime | |
| continue | |
| } | |
| // if first-line of current subtitle is repeating last-line of previous-subtitle remove it | |
| currentLines := strings.Split(subtitle.text, "\n") | |
| lastLines := strings.Split(lastSubtitle.text, "\n") | |
| if currentLines[0] == lastLines[len(lastLines)-1] { | |
| subtitle.text = strings.Join(currentLines[1:], "\n") | |
| } | |
| // if first-line of current subtitle is repeating last-line of previous-subtitle remove it | |
| if subtitle.fromTime < lastSubtitle.toTime { | |
| lastSubtitle.toTime = subtitle.fromTime - time.Millisecond | |
| } | |
| } | |
| writeOneSubtitle(newFile, lastSubtitle, &newIdx) | |
| } | |
| if subtitle == nil { | |
| break | |
| } | |
| if err != nil { | |
| panic(err) | |
| } | |
| lastSubtitle = subtitle | |
| } | |
| os.Rename(filePath, filePath + ".bak") | |
| os.Rename(newFilePath, filePath) | |
| } |
@jfeelio
Download and compilesubtitle-overlap-fixerwget https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f/raw/a3cbf48edd6ad0377b158e1455a702895e17f2dd/subtitle-overlap-fixer.go go build subtitle-overlap-fixer.goDownload an auto-generated subtitle from youtube:
youtube-dl --write-auto-sub --skip-download 'https://www.youtube.com/watch?v={VIDEO-ID}' ffmpeg -i '{DOWNLOADED-VVT-FILE}' '{SRT-FILE}.srt'Fix overlapping parts of subtitle in your converted
'{SRT-FILE}.srt'./subtitle-overlap-fixer '{SRT-FILE}.srt'
Hello,
When I use "ffmpeg -fix_sub_duration -i download.srt new.srt", fix the overlapping but remove the last line of subtitle, please how to evite this.
Thanks.
Actually I am having the same problem the output file ends up coming to be 0 kb. please help
I found the same problem, this happen when the file have BOM mark.
I removed the BOM mark, but now result only file with extension .fixed, no bak file is create, and the content is a only paragraph with time of start and end and all the text of file.
Greetings.
Thanks for sharing! Already had an .srt from youtube, so I just ran
sudo apt install golang-go
go build subtitle-overlap-fixer.go
./subtitle-overlap-fixer subtitles.srt
Output file worked great.
Hi Nima, this solved a problem for me so well. Thank you so much.
I use this tool as part of a little YouTube + Mac terminal routine to create and burn in captions for accessibility purposes, using youtube-dlc and ffmpeg as well. It's pretty neat.
I'm thinking about creating a bash script to do it all semi-automatically and maybe write a blog post about it so that others can use this routine to produce captioned videos quickly. If I do end up writing that script + blog post, may I include a link to your gist with credit (and praise!) in my blog post?
Thanks again!
@niceindividual That would be my pleasure. đŸŒ¹
For anyone else who would find this useful:
I had an issue with my SRT file being rejected by a picky program for having occasional blank entries like
42
00:03:14,000 --> 00:03:14,159
I moved the section at https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L111-L113 down to the end of the block to perform this check last. My thinking is that I was running into issues at line https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L124 that subverted the previous empty line check. In any case, pushing this section down seemed to resolve my issues!
Cheers to @nimatrueway for this awesome script. It saved me a ton of time!
Great job! It solved the problem I had with the overlapping. Thanks a lot.
AWESOME! Did exactly what I needed. There were some additional fixes I needed to do (sentence capitalization and changing lowercase "i" to uppercase "I" where needed.). I wrote a little OS X bash script for these issues if anyone want to try it. https://github.com/bruno-sardine/mac#Further-correct-YouTube-captions-captfixsh
For anyone else who would find this useful:
I had an issue with my SRT file being rejected by a picky program for having occasional blank entries like
42 00:03:14,000 --> 00:03:14,159I moved the section at https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L111-L113 down to the end of the block to perform this check last. My thinking is that I was running into issues at line https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L124 that subverted the previous empty line check. In any case, pushing this section down seemed to resolve my issues!
Cheers to @nimatrueway for this awesome script. It saved me a ton of time!
Thanks fork it
Thank you for sharing this, @nimatrueway.
Sir, I compiled your original code and tested it by fixing a subtitle file, say abc.srt. The output ends up being named abc.srt.fixed, which is a bit inconvenient since the operating system doesn't recognize it as a standard subtitle file due to the altered extension. So I modified the code to generate the output as abc.fixed.srt instead—this way, the .srt extension remains intact and the file is still recognized properly.
Uh oh!
There was an error while loading. Please reload this page.