Skip to content

Instantly share code, notes, and snippets.

@andrei15
Forked from antiboredom/transcribe.js
Created January 14, 2018 17:47
Show Gist options
  • Select an option

  • Save andrei15/fb219374546512d40b67e5deaafdcfb7 to your computer and use it in GitHub Desktop.

Select an option

Save andrei15/fb219374546512d40b67e5deaafdcfb7 to your computer and use it in GitHub Desktop.

Revisions

  1. @antiboredom antiboredom revised this gist Dec 28, 2015. 1 changed file with 4 additions and 4 deletions.
    8 changes: 4 additions & 4 deletions transcribe.js
    Original file line number Diff line number Diff line change
    @@ -3,8 +3,8 @@ var fs = require('fs');
    var sox = require('sox');
    var spawn = require('child_process').spawn;

    var user = '';
    var pass = '';
    var WATSON_USER = '';
    var WATSON_PASS = '';
    var url = 'https://stream.watsonplatform.net/speech-to-text/api/v1/recognize';


    @@ -80,8 +80,8 @@ function send_to_watson(file, callback) {
    inactivity_timeout: '-1'
    },
    auth: {
    user: user,
    pass: pass
    user: WATSON_USER,
    pass: WATSON_PASS
    },
    body: data
    };
  2. @antiboredom antiboredom created this gist Dec 28, 2015.
    179 changes: 179 additions & 0 deletions transcribe.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,179 @@
    var request = require('request');
    var fs = require('fs');
    var sox = require('sox');
    var spawn = require('child_process').spawn;

    var user = '';
    var pass = '';
    var url = 'https://stream.watsonplatform.net/speech-to-text/api/v1/recognize';



    function convertToWav(file, cb) {
    var aud_file = file + '.temp.wav';
    fs.exists(aud_file, function(exists) {
    if (exists) {
    cb(aud_file);
    } else {
    var convert = spawn('ffmpeg', ['-i', file, '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '16000', aud_file]);
    convert.on('close', function() {
    cb(aud_file);
    });
    }
    });
    }


    function split(file, cb) {
    //split into 5 minute chunks
    var maxLength = 60 * 5;
    var total = 0;
    var files = [];
    sox.identify(file, function(err, results) {
    var duration = results.duration;
    if (duration > maxLength) {
    var i = 0;
    while (i < duration) {
    total++;
    var filePart = file + '.' + i + '.wav';
    var args = [file, filePart, 'trim', '' + i, '' + maxLength];
    var s = spawn('sox', args);
    s.on('close', finishedSplit.bind({}, filePart, i));
    i += maxLength;
    }
    } else {
    cb([{
    name: filename,
    offset: 0
    }]);
    }
    });

    function finishedSplit(filename, start) {
    files.push({
    name: filename,
    offset: start
    });
    total--;
    if (total === 0) {
    cb(files);
    }
    }
    }


    function send_to_watson(file, callback) {
    fs.readFile(file, function(error, data) {
    if (error) {
    console.log(error);
    return false;
    }

    var options = {
    headers: {
    'Content-Type': 'audio/wav',
    'Transfer-Encoding': 'chunked',
    },
    qs: {
    timestamps: 'true',
    continuous: 'true',
    inactivity_timeout: '-1'
    },
    auth: {
    user: user,
    pass: pass
    },
    body: data
    };

    request.post(url, options, function(error, response, body) {
    if (error) console.log(error);

    // var parsed = parse(JSON.parse(body));
    if (typeof callback !== 'undefined') {
    callback(JSON.parse(body));
    }
    });
    });
    }


    function parse(data, offset) {
    if (typeof offset === 'undefined') offset = 0;

    var out = {
    sentences: []
    };

    out.sentences = data.results.map(function(r) {
    var item = {};
    var _item = r.alternatives[0];
    item.transcript = _item.transcript;
    item.confidence = _item.confidence;
    item.words = _item.timestamps.map(function(t) {
    return {
    word: t[0],
    start: t[1] + offset,
    end: t[2] + offset
    };
    });
    return item;
    });

    return out;
    }


    function writeOut(file, data) {
    var out = [];

    data = data.sort(function(a, b) {
    return a.f.offset - b.f.offset;
    });

    data.forEach(function(d) {
    out = out.concat(d.data.sentences);
    });

    fs.writeFile(file + '.transcription.json', JSON.stringify(out), function() {
    if (data.length > 1) {
    data.forEach(function(d) {
    fs.unlink(d.f.name);
    });
    }
    });
    }


    function transcribe(file, cb) {
    var out = [];
    var total = 0;

    convertToWav(file, function(newFile) {

    split(newFile, function(files) {
    total = files.length;

    files.forEach(function(f) {
    send_to_watson(f.name, function(data) {
    var parsed = parse(data, f.offset);
    out.push({
    f: f,
    data: parsed
    });
    total--;
    if (total === 0) {
    writeOut(file, out);
    }
    });

    });
    });
    });
    }

    var file = process.argv[2];

    transcribe(file);

    module.exports = transcribe;