Skip to content

Instantly share code, notes, and snippets.

@PathToLife
Created March 9, 2021 14:05
Show Gist options
  • Select an option

  • Save PathToLife/14582c2a06aca13da7be1490af014f81 to your computer and use it in GitHub Desktop.

Select an option

Save PathToLife/14582c2a06aca13da7be1490af014f81 to your computer and use it in GitHub Desktop.

Revisions

  1. PathToLife created this gist Mar 9, 2021.
    101 changes: 101 additions & 0 deletions cleansrt.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,101 @@
    import os
    from typing import List
    import shutil

    dirs = os.listdir(os.getcwd())
    srtpaths = [x for x in dirs if x[-4:] == '.srt']

    class SrtLine:
    num: int
    timeStr: str
    text: str

    def __init__(self, num: int, timeStr: str, text: str):
    self.num = num
    self.timeStr = timeStr
    self.text = text

    def __str__(self):
    return f"{self.num} {self.timeStr} {self.text}"

    class Buffer:

    buff: []
    i: int

    def __init__(self, buff):
    self.i = 0
    self.buff = buff

    def readNonEmptyLine(self):
    dat = self.readLine()
    while len(dat) == 0:
    dat = self.readLine()
    return dat

    def readLine(self):
    if self.reachedEnd():
    raise EOFError()
    dat = self.buff[self.i]
    self.i += 1
    return dat.strip()

    def reachedEnd(self) -> bool:
    return self.i >= len(self.buff)

    def cleanSrt(file):
    f = open(file, 'r')
    lines = f.readlines()
    f.close()

    strLines: List[SrtLine] = []

    buff = Buffer(lines)

    try:
    while not buff.reachedEnd():

    number: str = buff.readNonEmptyLine()
    if not number.isnumeric():
    raise ValueError('srt corrupt, expected number')

    timeRange = buff.readNonEmptyLine()
    if timeRange.isnumeric(): # extra number
    number = timeRange
    timeRange = buff.readNonEmptyLine()

    if len(timeRange) == 0 or timeRange.isnumeric():
    raise ValueError('srt corrupt, expected time range')

    number = int(number)

    text = buff.readNonEmptyLine()
    if len(text) == 0:
    raise ValueError('srt corrupt, expected text')

    srtLine = SrtLine(
    number, timeRange, text
    )
    strLines.append(srtLine)

    print(srtLine)
    except EOFError:
    pass

    if len(strLines) == 0:
    raise RuntimeError('srtLines empty after cleaning')

    return strLines


    def writeSrt(fp, srtLines: List[SrtLine]):
    with open(fp, 'w') as f:
    for srtLine in srtLines:
    f.write(str(srtLine.num) + "\n")
    f.write(srtLine.timeStr + "\n")
    f.write(srtLine.text + "\n\n")

    for fp in srtpaths:
    srtLines = cleanSrt(fp)
    shutil.copy(fp, fp + '.bak')
    writeSrt(fp, srtLines)