Created
April 17, 2019 17:17
-
-
Save kmcheung12/fea8105a0aac41e9d6f98f1df8634ca6 to your computer and use it in GitHub Desktop.
Convert youtube timedtext subtitle file into srt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import xml.etree.ElementTree as ET | |
| fmt = '%02d:%02d:%02d,%03d' | |
| def to_time(m_sec): | |
| mm = m_sec % 1000 | |
| s = m_sec // 1000 % 60 | |
| m = m_sec // 1000 // 60 % 60 | |
| h = m_sec // 1000 // 60 // 60 | |
| return (h, m, s, mm) | |
| def fmt_time(t, d): | |
| start = int(t) | |
| end = start + int(d) | |
| t1 = fmt % to_time(start) | |
| t2 = fmt % to_time(end) | |
| return f'{t1} --> {t2}' | |
| def fmt_line(n, d, s): | |
| return '%d\n%s\n%s\n\n' % (n, d, s) | |
| if __name__ == '__main__': | |
| # run with python timedtext2srt.py {timedtext} {srt} | |
| import sys | |
| input = sys.argv[1] | |
| output = sys.argv[2] | |
| tree = ET.parse(input) | |
| root = tree.getroot() | |
| with open(output, 'w') as f: | |
| for n, line in enumerate(root.iter('p')): | |
| attr = line.attrib | |
| t = attr['t'] | |
| d = attr['d'] | |
| f.write(fmt_line(n, fmt_time(t, d), line.text)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment