Skip to content

Instantly share code, notes, and snippets.

@kadamwhite
Created May 13, 2025 18:50
Show Gist options
  • Select an option

  • Save kadamwhite/ced5cb41d129a4192aefe774389e9e6c to your computer and use it in GitHub Desktop.

Select an option

Save kadamwhite/ced5cb41d129a4192aefe774389e9e6c to your computer and use it in GitHub Desktop.

Revisions

  1. kadamwhite revised this gist May 13, 2025. 1 changed file with 14 additions and 3 deletions.
    17 changes: 14 additions & 3 deletions loom-transcript-to-srt.js
    Original file line number Diff line number Diff line change
    @@ -1,15 +1,26 @@
    #!/usr/bin/env node

    /**
    * This script is designed to receive piped content from a text file containing
    * a loom.com video transcript, which is usually copyable on the free plan as
    * a series of lines in the format
    * 0:02 What I said then, etc etc etc...
    *
    * The output of the script is SRT-format subtitle text. It can be output into
    * a .srt file for use in VLC or other subtitle-aware video players, or later
    * combination with a video source using ffmpeg.
    */

    const readline = require( 'readline' );

    // This is a nodejs file.
    // Constants
    const CHARS_PER_SECOND = 15; // Adjust as needed for reading speed
    const MAX_CHARS_PER_LINE_SRT = 40;
    const MAX_CHARS_PER_LINE_SRT = 35;
    const MAX_LINES_PER_SRT = 2;
    const MAX_CHARS_PER_SRT_BLOCK = MAX_CHARS_PER_LINE_SRT * MAX_LINES_PER_SRT;
    const MIN_IDEAL_DURATION_SRT = 1.0; // Minimum ideal duration for a subtitle segment
    const MAX_IDEAL_DURATION_SRT = 7.0; // Maximum ideal duration for a subtitle segment
    const MIN_IDEAL_DURATION_SRT = 0.8; // Minimum ideal duration for a subtitle segment
    const MAX_IDEAL_DURATION_SRT = 5.0; // Maximum ideal duration for a subtitle segment

    // Helper function to parse "M:SS" or "MM:SS" into seconds
    function parseTime( timeStr ) {
  2. kadamwhite created this gist May 13, 2025.
    228 changes: 228 additions & 0 deletions loom-transcript-to-srt.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,228 @@
    #!/usr/bin/env node

    const readline = require( 'readline' );

    // This is a nodejs file.
    // Constants
    const CHARS_PER_SECOND = 15; // Adjust as needed for reading speed
    const MAX_CHARS_PER_LINE_SRT = 40;
    const MAX_LINES_PER_SRT = 2;
    const MAX_CHARS_PER_SRT_BLOCK = MAX_CHARS_PER_LINE_SRT * MAX_LINES_PER_SRT;
    const MIN_IDEAL_DURATION_SRT = 1.0; // Minimum ideal duration for a subtitle segment
    const MAX_IDEAL_DURATION_SRT = 7.0; // Maximum ideal duration for a subtitle segment

    // Helper function to parse "M:SS" or "MM:SS" into seconds
    function parseTime( timeStr ) {
    const parts = timeStr.split( ':' );
    const minutes = parseInt( parts[ 0 ], 10 );
    const seconds = parseInt( parts[ 1 ], 10 );
    return minutes * 60 + seconds;
    }

    // Helper function to format seconds into HH:MM:SS,mmm
    function formatSrtTime( totalSeconds ) {
    // Work with total milliseconds to avoid floating point inaccuracies and ripple effects
    let totalMilliseconds = Math.round( totalSeconds * 1000 );

    const hours = Math.floor( totalMilliseconds / 3600000 );
    totalMilliseconds %= 3600000;
    const minutes = Math.floor( totalMilliseconds / 60000 );
    totalMilliseconds %= 60000;
    const seconds = Math.floor( totalMilliseconds / 1000 );
    const milliseconds = totalMilliseconds % 1000;

    return `${ String( hours ).padStart( 2, '0' ) }:${ String( minutes ).padStart(
    2,
    '0'
    ) }:${ String( seconds ).padStart( 2, '0' ) },${ String( milliseconds ).padStart(
    3,
    '0'
    ) }`;
    }

    // Helper function to split text into chunks that fit MAX_CHARS_PER_SRT_BLOCK
    function splitTextIntoChunks( text, maxLength ) {
    const chunks = [];
    const words = text.split( /\s+/ ).filter( ( w ) => w.length > 0 );
    if ( words.length === 0 ) return [];

    let currentChunk = '';
    for ( const word of words ) {
    if ( currentChunk.length === 0 ) {
    currentChunk = word;
    } else if ( currentChunk.length + 1 + word.length <= maxLength ) {
    currentChunk += ' ' + word;
    } else {
    chunks.push( currentChunk );
    currentChunk = word;
    }
    }
    if ( currentChunk.length > 0 ) {
    chunks.push( currentChunk );
    }
    return chunks;
    }

    // Helper function to format a single chunk of text into at most MAX_LINES_PER_SRT lines
    function formatTextForSrt( textChunk, maxCharsPerLine ) {
    const words = textChunk.split( /\s+/ ).filter( ( w ) => w.length > 0 );
    if ( words.length === 0 ) return '';

    const lines = [];
    let currentLine = '';

    for ( const word of words ) {
    if ( MAX_LINES_PER_SRT > 0 && lines.length === MAX_LINES_PER_SRT ) {
    // Already have max lines, append remaining words to the last line
    lines[ MAX_LINES_PER_SRT - 1 ] += ' ' + word;
    continue;
    }

    if ( currentLine.length === 0 ) {
    currentLine = word;
    } else if ( currentLine.length + 1 + word.length <= maxCharsPerLine ) {
    currentLine += ' ' + word;
    } else {
    lines.push( currentLine );
    currentLine = word; // Start new line
    }
    }

    // Add the last accumulated line, if any, and if there's space for it
    if ( currentLine.length > 0 ) {
    if ( lines.length < MAX_LINES_PER_SRT ) {
    lines.push( currentLine );
    } else if ( MAX_LINES_PER_SRT > 0 ) {
    // Append to last line if max lines already formed
    lines[ MAX_LINES_PER_SRT - 1 ] += ' ' + currentLine;
    }
    // If MAX_LINES_PER_SRT is 0, currentLine is dropped (edge case)
    }
    return lines.join( '\n' );
    }

    const rl = readline.createInterface( {
    input: process.stdin,
    terminal: false, // Ensure it works with piped input
    } );

    const rawEntries = [];

    rl.on( 'line', ( line ) => {
    const match = line.match( /^(\d{1,2}:\d{2})\s+(.*)$/ );
    if ( match ) {
    const timeStr = match[ 1 ];
    const text = match[ 2 ].trim();
    if ( text.length > 0 ) {
    // Only process lines with actual text
    rawEntries.push( { timeStr, text } );
    }
    }
    } );

    rl.on( 'close', () => {
    const segments = rawEntries
    .map( ( entry ) => ( {
    timeSeconds: parseTime( entry.timeStr ),
    text: entry.text,
    } ) )
    .sort( ( a, b ) => a.timeSeconds - b.timeSeconds ); // Sort by time

    const srtOutput = [];
    let subtitleIndex = 1;

    for ( let i = 0; i < segments.length; i++ ) {
    const currentSegment = segments[ i ];
    const startTime = currentSegment.timeSeconds;
    const text = currentSegment.text;

    let endTimeOverallSegment;
    if ( i + 1 < segments.length ) {
    endTimeOverallSegment = segments[ i + 1 ].timeSeconds;
    } else {
    // For the last segment, estimate duration based on text length
    const estimatedDuration = Math.max(
    MIN_IDEAL_DURATION_SRT,
    text.length / CHARS_PER_SECOND
    );
    endTimeOverallSegment = startTime + estimatedDuration;
    }

    // Handle cases where timestamps might be out of order or too close
    if ( endTimeOverallSegment <= startTime ) {
    const fallbackDuration = Math.max(
    MIN_IDEAL_DURATION_SRT,
    text.length / CHARS_PER_SECOND
    );
    endTimeOverallSegment = startTime + fallbackDuration;
    }

    const chunks = splitTextIntoChunks( text, MAX_CHARS_PER_SRT_BLOCK );
    if ( chunks.length === 0 ) continue;

    const idealDurations = chunks.map( ( ch ) =>
    Math.max(
    MIN_IDEAL_DURATION_SRT,
    Math.min( MAX_IDEAL_DURATION_SRT, ch.length / CHARS_PER_SECOND )
    )
    );
    const sumIdealDurations = idealDurations.reduce( ( sum, d ) => sum + d, 0 );

    const availableDurationForSegment = endTimeOverallSegment - startTime;
    let currentChunkTime = startTime;

    for ( let j = 0; j < chunks.length; j++ ) {
    const chunkText = chunks[ j ];
    let durationForThisChunk;

    if ( sumIdealDurations === 0 ) {
    // Fallback if all ideal durations are zero
    durationForThisChunk = availableDurationForSegment / chunks.length;
    } else {
    durationForThisChunk =
    ( idealDurations[ j ] / sumIdealDurations ) * availableDurationForSegment;
    }
    durationForThisChunk = Math.max( 0, durationForThisChunk ); // Ensure non-negative

    let chunkStartTime = currentChunkTime;
    let chunkEndTime = chunkStartTime + durationForThisChunk;

    // Ensure the last chunk of a segment aligns with the segment's overall end time
    if ( j === chunks.length - 1 ) {
    chunkEndTime = endTimeOverallSegment;
    }

    // Ensure start < end for a valid subtitle. If not, try to give a minimal duration or skip.
    if ( chunkEndTime <= chunkStartTime ) {
    if ( chunkStartTime < endTimeOverallSegment ) {
    // If there's any room at all
    chunkEndTime = Math.min( chunkStartTime + 0.001, endTimeOverallSegment ); // Minimal 1ms duration
    } else {
    // No room for this chunk, skip it.
    currentChunkTime = chunkEndTime; // Advance time cursor
    continue;
    }
    }

    const formattedText = formatTextForSrt( chunkText, MAX_CHARS_PER_LINE_SRT );
    if ( formattedText.length === 0 ) {
    // Skip if formatting results in no text
    currentChunkTime = chunkEndTime; // Advance time cursor
    continue;
    }

    srtOutput.push( String( subtitleIndex++ ) );
    srtOutput.push(
    `${ formatSrtTime( chunkStartTime ) } --> ${ formatSrtTime( chunkEndTime ) }`
    );
    srtOutput.push( formattedText );
    srtOutput.push( '' ); // Blank line separator

    currentChunkTime = chunkEndTime; // Next chunk starts where this one ended
    }
    }

    if ( srtOutput.length > 0 ) {
    process.stdout.write( srtOutput.join( '\n' ) );
    }
    } );