b-adams · April 13, 2024 15:33 · Sep 2, 2015 · Sep 2, 2015 · Sep 2, 2015
diff --git a/Converted_output.csv b/Converted_output.csv
@@ -1,4 +1,5 @@
 Entries,Timecodes,Subtitles
 1,"00:00:00,104 --> 00:00:02,669","Hi, I'm shell-scripting."
-2,"00:00:02,982 --> 00:00:04,965","I'm not sure if it would work,but I'll try it!"
+2,"00:00:02,982 --> 00:00:04,965","I'm not sure if it would work,"
+,,but I'll try it!
 3,"00:00:05,085 --> 00:00:07,321",There must be a way to do it!
diff --git a/console_output.log b/console_output.log
@@ -1,2 +1,2 @@
-Done converting Wildlife.srt to Subtitle.xlsx. 3 subtitle entries found
-[Finished in 2.0s]
+Done converting Wildlife.srt to Subtitle.xlsx. 3 subtitle entries found. 5 rows written
+[Finished in 0.6s]
diff --git a/convert.py b/convert.py
@@ -7,7 +7,7 @@ def parse_subtitles(lines):
     line_timestamp = re.compile('^\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}$')
     line_seperator = re.compile('^\s*$')
 
-    current_record = {'index':None, 'timestamp':None, 'subtitle':''}
+    current_record = {'index':None, 'timestamp':None, 'subtitles':[]}
     state = 'seeking to next entry'
 
     for line in lines:
@@ -33,36 +33,52 @@ def parse_subtitles(lines):
                 logging.info('Blank line reached, yielding record: {r}'.format(r=current_record))
                 yield current_record
                 state = 'seeking to next entry'
-                current_record = {'index':None, 'timestamp':None, 'subtitle':''}
+                current_record = {'index':None, 'timestamp':None, 'subtitles':[]}
             else:
                 logging.debug('Appending to subtitle: {s}'.format(s=line))
-                current_record['subtitle'] += line
+                current_record['subtitles'].append(line)
 
         else:
             logging.error('HUH: Fell into an unknown state: `{s}`'.format(s=state))
     if state == 'reading subtitles':
         # We must have finished the file without encountering a blank line. Dump the last record
         yield current_record
 
-def write_dict_to_worksheet(colnums_for_keys, keyed_data, worksheet, row):
-    for (colname, colindex) in colnums_for_keys.items():
-        worksheet.write(row, colindex, keyed_data[colname])
+def write_dict_to_worksheet(columns_for_keys, keyed_data, worksheet, row):
+    """
+    Write a subtitle-record to a worksheet. 
+    Return the row number after those that were written (since this may write multiple rows)
+    """
+    current_row = row
+    #First, horizontally write the entry and timecode
+    for (colname, colindex) in columns_for_keys.items():
+        if colname != 'subtitles': 
+            worksheet.write(current_row, colindex, keyed_data[colname])
+
+    #Next, vertically write the subtitle data
+    subtitle_column = columns_for_keys['subtitles']
+    for morelines in keyed_data['subtitles']:
+        worksheet.write(current_row, subtitle_column, morelines)
+        current_row+=1
+
+    return current_row
 
 def convert(input_filename, output_filename):
     workbook = xlsxwriter.Workbook(output_filename)
-    worksheet = workbook.add_worksheet('subtitle')
-    columns = {'index':0, 'timestamp':1, 'subtitle':2}
+    worksheet = workbook.add_worksheet('subtitles')
+    columns = {'index':0, 'timestamp':1, 'subtitles':2}
 
-    row = 0
-    headings = {'index':"Entries", 'timestamp':"Timecodes", 'subtitle':"Subtitles"}
-    write_dict_to_worksheet(columns, headings, worksheet, row)
+    next_available_row = 0
+    records_processed = 0
+    headings = {'index':"Entries", 'timestamp':"Timecodes", 'subtitles':["Subtitles"]}
+    next_available_row=write_dict_to_worksheet(columns, headings, worksheet, next_available_row)
 
     with open(input_filename) as textfile:
         for record in parse_subtitles(textfile):
-            row += 1
-            write_dict_to_worksheet(columns, record, worksheet, row)
+            next_available_row = write_dict_to_worksheet(columns, record, worksheet, next_available_row)
+            records_processed += 1
 
-    print('Done converting {inp} to {outp}. {n} subtitle entries found'.format(inp=input_filename, outp=output_filename, n=row))
+    print('Done converting {inp} to {outp}. {n} subtitle entries found. {m} rows written'.format(inp=input_filename, outp=output_filename, n=records_processed, m=next_available_row))
     workbook.close()
 
 convert(input_filename='Wildlife.srt', output_filename='Subtitle.xlsx')
diff --git a/Converted_output.csv b/Converted_output.csv
@@ -0,0 +1,4 @@
+Entries,Timecodes,Subtitles
+1,"00:00:00,104 --> 00:00:02,669","Hi, I'm shell-scripting."
+2,"00:00:02,982 --> 00:00:04,965","I'm not sure if it would work,but I'll try it!"
+3,"00:00:05,085 --> 00:00:07,321",There must be a way to do it!
diff --git a/Wildlife.srt b/Wildlife.srt
@@ -0,0 +1,12 @@
+1
+00:00:00,104 --> 00:00:02,669
+Hi, I'm shell-scripting.
+
+2
+00:00:02,982 --> 00:00:04,965
+I'm not sure if it would work,
+but I'll try it!
+
+3
+00:00:05,085 --> 00:00:07,321
+There must be a way to do it!
diff --git a/console_output.log b/console_output.log
@@ -0,0 +1,2 @@
+Done converting Wildlife.srt to Subtitle.xlsx. 3 subtitle entries found
+[Finished in 2.0s]
diff --git a/convert.py b/convert.py
@@ -0,0 +1,68 @@
+import xlsxwriter
+import re
+import logging
+
+def parse_subtitles(lines):
+    line_index = re.compile('^\d*$')
+    line_timestamp = re.compile('^\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}$')
+    line_seperator = re.compile('^\s*$')
+
+    current_record = {'index':None, 'timestamp':None, 'subtitle':''}
+    state = 'seeking to next entry'
+
+    for line in lines:
+        line = line.strip('\n')
+        if state == 'seeking to next entry':
+            if line_index.match(line):
+                logging.debug('Found index: {i}'.format(i=line))
+                current_record['index'] = line
+                state = 'looking for timestamp'
+            else:
+                logging.error('HUH: Expected to find an index, but instead found: [{d}]'.format(d=line))
+
+        elif state == 'looking for timestamp':
+            if line_timestamp.match(line):
+                logging.debug('Found timestamp: {t}'.format(t=line))
+                current_record['timestamp'] = line
+                state = 'reading subtitles'
+            else:
+                logging.error('HUH: Expected to find a timestamp, but instead found: [{d}]'.format(d=line))
+
+        elif state == 'reading subtitles':
+            if line_seperator.match(line):
+                logging.info('Blank line reached, yielding record: {r}'.format(r=current_record))
+                yield current_record
+                state = 'seeking to next entry'
+                current_record = {'index':None, 'timestamp':None, 'subtitle':''}
+            else:
+                logging.debug('Appending to subtitle: {s}'.format(s=line))
+                current_record['subtitle'] += line
+
+        else:
+            logging.error('HUH: Fell into an unknown state: `{s}`'.format(s=state))
+    if state == 'reading subtitles':
+        # We must have finished the file without encountering a blank line. Dump the last record
+        yield current_record
+
+def write_dict_to_worksheet(colnums_for_keys, keyed_data, worksheet, row):
+    for (colname, colindex) in colnums_for_keys.items():
+        worksheet.write(row, colindex, keyed_data[colname])
+
+def convert(input_filename, output_filename):
+    workbook = xlsxwriter.Workbook(output_filename)
+    worksheet = workbook.add_worksheet('subtitle')
+    columns = {'index':0, 'timestamp':1, 'subtitle':2}
+
+    row = 0
+    headings = {'index':"Entries", 'timestamp':"Timecodes", 'subtitle':"Subtitles"}
+    write_dict_to_worksheet(columns, headings, worksheet, row)
+
+    with open(input_filename) as textfile:
+        for record in parse_subtitles(textfile):
+            row += 1
+            write_dict_to_worksheet(columns, record, worksheet, row)
+
+    print('Done converting {inp} to {outp}. {n} subtitle entries found'.format(inp=input_filename, outp=output_filename, n=row))
+    workbook.close()
+
+convert(input_filename='Wildlife.srt', output_filename='Subtitle.xlsx')
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Done converting Wildlife.srt to Subtitle.xlsx. 3 subtitle entries found
		[Finished in 2.0s]