kadamwhite · May 13, 2025 18:50 · May 13, 2025 · May 13, 2025
diff --git a/loom-transcript-to-srt.js b/loom-transcript-to-srt.js
@@ -1,15 +1,26 @@
 #!/usr/bin/env node
 
+/**
+ * This script is designed to receive piped content from a text file containing
+ * a loom.com video transcript, which is usually copyable on the free plan as
+ * a series of lines in the format
+ *     0:02 What I said then, etc etc etc...
+ *
+ * The output of the script is SRT-format subtitle text. It can be output into
+ * a .srt file for use in VLC or other subtitle-aware video players, or later
+ * combination with a video source using ffmpeg.
+ */
+
 const readline = require( 'readline' );
 
 // This is a nodejs file.
 // Constants
 const CHARS_PER_SECOND = 15; // Adjust as needed for reading speed
-const MAX_CHARS_PER_LINE_SRT = 40;
+const MAX_CHARS_PER_LINE_SRT = 35;
 const MAX_LINES_PER_SRT = 2;
 const MAX_CHARS_PER_SRT_BLOCK = MAX_CHARS_PER_LINE_SRT * MAX_LINES_PER_SRT;
-const MIN_IDEAL_DURATION_SRT = 1.0; // Minimum ideal duration for a subtitle segment
-const MAX_IDEAL_DURATION_SRT = 7.0; // Maximum ideal duration for a subtitle segment
+const MIN_IDEAL_DURATION_SRT = 0.8; // Minimum ideal duration for a subtitle segment
+const MAX_IDEAL_DURATION_SRT = 5.0; // Maximum ideal duration for a subtitle segment
 
 // Helper function to parse "M:SS" or "MM:SS" into seconds
 function parseTime( timeStr ) {

diff --git a/loom-transcript-to-srt.js b/loom-transcript-to-srt.js
@@ -0,0 +1,228 @@
+#!/usr/bin/env node
+
+const readline = require( 'readline' );
+
+// This is a nodejs file.
+// Constants
+const CHARS_PER_SECOND = 15; // Adjust as needed for reading speed
+const MAX_CHARS_PER_LINE_SRT = 40;
+const MAX_LINES_PER_SRT = 2;
+const MAX_CHARS_PER_SRT_BLOCK = MAX_CHARS_PER_LINE_SRT * MAX_LINES_PER_SRT;
+const MIN_IDEAL_DURATION_SRT = 1.0; // Minimum ideal duration for a subtitle segment
+const MAX_IDEAL_DURATION_SRT = 7.0; // Maximum ideal duration for a subtitle segment
+
+// Helper function to parse "M:SS" or "MM:SS" into seconds
+function parseTime( timeStr ) {
+	const parts = timeStr.split( ':' );
+	const minutes = parseInt( parts[ 0 ], 10 );
+	const seconds = parseInt( parts[ 1 ], 10 );
+	return minutes * 60 + seconds;
+}
+
+// Helper function to format seconds into HH:MM:SS,mmm
+function formatSrtTime( totalSeconds ) {
+	// Work with total milliseconds to avoid floating point inaccuracies and ripple effects
+	let totalMilliseconds = Math.round( totalSeconds * 1000 );
+
+	const hours = Math.floor( totalMilliseconds / 3600000 );
+	totalMilliseconds %= 3600000;
+	const minutes = Math.floor( totalMilliseconds / 60000 );
+	totalMilliseconds %= 60000;
+	const seconds = Math.floor( totalMilliseconds / 1000 );
+	const milliseconds = totalMilliseconds % 1000;
+
+	return `${ String( hours ).padStart( 2, '0' ) }:${ String( minutes ).padStart(
+		2,
+		'0'
+	) }:${ String( seconds ).padStart( 2, '0' ) },${ String( milliseconds ).padStart(
+		3,
+		'0'
+	) }`;
+}
+
+// Helper function to split text into chunks that fit MAX_CHARS_PER_SRT_BLOCK
+function splitTextIntoChunks( text, maxLength ) {
+	const chunks = [];
+	const words = text.split( /\s+/ ).filter( ( w ) => w.length > 0 );
+	if ( words.length === 0 ) return [];
+
+	let currentChunk = '';
+	for ( const word of words ) {
+		if ( currentChunk.length === 0 ) {
+			currentChunk = word;
+		} else if ( currentChunk.length + 1 + word.length <= maxLength ) {
+			currentChunk += ' ' + word;
+		} else {
+			chunks.push( currentChunk );
+			currentChunk = word;
+		}
+	}
+	if ( currentChunk.length > 0 ) {
+		chunks.push( currentChunk );
+	}
+	return chunks;
+}
+
+// Helper function to format a single chunk of text into at most MAX_LINES_PER_SRT lines
+function formatTextForSrt( textChunk, maxCharsPerLine ) {
+	const words = textChunk.split( /\s+/ ).filter( ( w ) => w.length > 0 );
+	if ( words.length === 0 ) return '';
+
+	const lines = [];
+	let currentLine = '';
+
+	for ( const word of words ) {
+		if ( MAX_LINES_PER_SRT > 0 && lines.length === MAX_LINES_PER_SRT ) {
+			// Already have max lines, append remaining words to the last line
+			lines[ MAX_LINES_PER_SRT - 1 ] += ' ' + word;
+			continue;
+		}
+
+		if ( currentLine.length === 0 ) {
+			currentLine = word;
+		} else if ( currentLine.length + 1 + word.length <= maxCharsPerLine ) {
+			currentLine += ' ' + word;
+		} else {
+			lines.push( currentLine );
+			currentLine = word; // Start new line
+		}
+	}
+
+	// Add the last accumulated line, if any, and if there's space for it
+	if ( currentLine.length > 0 ) {
+		if ( lines.length < MAX_LINES_PER_SRT ) {
+			lines.push( currentLine );
+		} else if ( MAX_LINES_PER_SRT > 0 ) {
+			// Append to last line if max lines already formed
+			lines[ MAX_LINES_PER_SRT - 1 ] += ' ' + currentLine;
+		}
+		// If MAX_LINES_PER_SRT is 0, currentLine is dropped (edge case)
+	}
+	return lines.join( '\n' );
+}
+
+const rl = readline.createInterface( {
+	input: process.stdin,
+	terminal: false, // Ensure it works with piped input
+} );
+
+const rawEntries = [];
+
+rl.on( 'line', ( line ) => {
+	const match = line.match( /^(\d{1,2}:\d{2})\s+(.*)$/ );
+	if ( match ) {
+		const timeStr = match[ 1 ];
+		const text = match[ 2 ].trim();
+		if ( text.length > 0 ) {
+			// Only process lines with actual text
+			rawEntries.push( { timeStr, text } );
+		}
+	}
+} );
+
+rl.on( 'close', () => {
+	const segments = rawEntries
+		.map( ( entry ) => ( {
+			timeSeconds: parseTime( entry.timeStr ),
+			text: entry.text,
+		} ) )
+		.sort( ( a, b ) => a.timeSeconds - b.timeSeconds ); // Sort by time
+
+	const srtOutput = [];
+	let subtitleIndex = 1;
+
+	for ( let i = 0; i < segments.length; i++ ) {
+		const currentSegment = segments[ i ];
+		const startTime = currentSegment.timeSeconds;
+		const text = currentSegment.text;
+
+		let endTimeOverallSegment;
+		if ( i + 1 < segments.length ) {
+			endTimeOverallSegment = segments[ i + 1 ].timeSeconds;
+		} else {
+			// For the last segment, estimate duration based on text length
+			const estimatedDuration = Math.max(
+				MIN_IDEAL_DURATION_SRT,
+				text.length / CHARS_PER_SECOND
+			);
+			endTimeOverallSegment = startTime + estimatedDuration;
+		}
+
+		// Handle cases where timestamps might be out of order or too close
+		if ( endTimeOverallSegment <= startTime ) {
+			const fallbackDuration = Math.max(
+				MIN_IDEAL_DURATION_SRT,
+				text.length / CHARS_PER_SECOND
+			);
+			endTimeOverallSegment = startTime + fallbackDuration;
+		}
+
+		const chunks = splitTextIntoChunks( text, MAX_CHARS_PER_SRT_BLOCK );
+		if ( chunks.length === 0 ) continue;
+
+		const idealDurations = chunks.map( ( ch ) =>
+			Math.max(
+				MIN_IDEAL_DURATION_SRT,
+				Math.min( MAX_IDEAL_DURATION_SRT, ch.length / CHARS_PER_SECOND )
+			)
+		);
+		const sumIdealDurations = idealDurations.reduce( ( sum, d ) => sum + d, 0 );
+
+		const availableDurationForSegment = endTimeOverallSegment - startTime;
+		let currentChunkTime = startTime;
+
+		for ( let j = 0; j < chunks.length; j++ ) {
+			const chunkText = chunks[ j ];
+			let durationForThisChunk;
+
+			if ( sumIdealDurations === 0 ) {
+				// Fallback if all ideal durations are zero
+				durationForThisChunk = availableDurationForSegment / chunks.length;
+			} else {
+				durationForThisChunk =
+					( idealDurations[ j ] / sumIdealDurations ) * availableDurationForSegment;
+			}
+			durationForThisChunk = Math.max( 0, durationForThisChunk ); // Ensure non-negative
+
+			let chunkStartTime = currentChunkTime;
+			let chunkEndTime = chunkStartTime + durationForThisChunk;
+
+			// Ensure the last chunk of a segment aligns with the segment's overall end time
+			if ( j === chunks.length - 1 ) {
+				chunkEndTime = endTimeOverallSegment;
+			}
+
+			// Ensure start < end for a valid subtitle. If not, try to give a minimal duration or skip.
+			if ( chunkEndTime <= chunkStartTime ) {
+				if ( chunkStartTime < endTimeOverallSegment ) {
+					// If there's any room at all
+					chunkEndTime = Math.min( chunkStartTime + 0.001, endTimeOverallSegment ); // Minimal 1ms duration
+				} else {
+					// No room for this chunk, skip it.
+					currentChunkTime = chunkEndTime; // Advance time cursor
+					continue;
+				}
+			}
+
+			const formattedText = formatTextForSrt( chunkText, MAX_CHARS_PER_LINE_SRT );
+			if ( formattedText.length === 0 ) {
+				// Skip if formatting results in no text
+				currentChunkTime = chunkEndTime; // Advance time cursor
+				continue;
+			}
+
+			srtOutput.push( String( subtitleIndex++ ) );
+			srtOutput.push(
+				`${ formatSrtTime( chunkStartTime ) } --> ${ formatSrtTime( chunkEndTime ) }`
+			);
+			srtOutput.push( formattedText );
+			srtOutput.push( '' ); // Blank line separator
+
+			currentChunkTime = chunkEndTime; // Next chunk starts where this one ended
+		}
+	}
+
+	if ( srtOutput.length > 0 ) {
+		process.stdout.write( srtOutput.join( '\n' ) );
+	}
+} );
No results found