Skip to content

Instantly share code, notes, and snippets.

@kmcheung12
Created April 17, 2019 17:17
Show Gist options
  • Save kmcheung12/fea8105a0aac41e9d6f98f1df8634ca6 to your computer and use it in GitHub Desktop.
Save kmcheung12/fea8105a0aac41e9d6f98f1df8634ca6 to your computer and use it in GitHub Desktop.
Convert youtube timedtext subtitle file into srt
import xml.etree.ElementTree as ET
fmt = '%02d:%02d:%02d,%03d'
def to_time(m_sec):
mm = m_sec % 1000
s = m_sec // 1000 % 60
m = m_sec // 1000 // 60 % 60
h = m_sec // 1000 // 60 // 60
return (h, m, s, mm)
def fmt_time(t, d):
start = int(t)
end = start + int(d)
t1 = fmt % to_time(start)
t2 = fmt % to_time(end)
return f'{t1} --> {t2}'
def fmt_line(n, d, s):
return '%d\n%s\n%s\n\n' % (n, d, s)
if __name__ == '__main__':
# run with python timedtext2srt.py {timedtext} {srt}
import sys
input = sys.argv[1]
output = sys.argv[2]
tree = ET.parse(input)
root = tree.getroot()
with open(output, 'w') as f:
for n, line in enumerate(root.iter('p')):
attr = line.attrib
t = attr['t']
d = attr['d']
f.write(fmt_line(n, fmt_time(t, d), line.text))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment