|
|
@@ -0,0 +1,179 @@ |
|
|
var request = require('request'); |
|
|
var fs = require('fs'); |
|
|
var sox = require('sox'); |
|
|
var spawn = require('child_process').spawn; |
|
|
|
|
|
var user = ''; |
|
|
var pass = ''; |
|
|
var url = 'https://stream.watsonplatform.net/speech-to-text/api/v1/recognize'; |
|
|
|
|
|
|
|
|
|
|
|
function convertToWav(file, cb) { |
|
|
var aud_file = file + '.temp.wav'; |
|
|
fs.exists(aud_file, function(exists) { |
|
|
if (exists) { |
|
|
cb(aud_file); |
|
|
} else { |
|
|
var convert = spawn('ffmpeg', ['-i', file, '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '16000', aud_file]); |
|
|
convert.on('close', function() { |
|
|
cb(aud_file); |
|
|
}); |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
function split(file, cb) { |
|
|
//split into 5 minute chunks |
|
|
var maxLength = 60 * 5; |
|
|
var total = 0; |
|
|
var files = []; |
|
|
sox.identify(file, function(err, results) { |
|
|
var duration = results.duration; |
|
|
if (duration > maxLength) { |
|
|
var i = 0; |
|
|
while (i < duration) { |
|
|
total++; |
|
|
var filePart = file + '.' + i + '.wav'; |
|
|
var args = [file, filePart, 'trim', '' + i, '' + maxLength]; |
|
|
var s = spawn('sox', args); |
|
|
s.on('close', finishedSplit.bind({}, filePart, i)); |
|
|
i += maxLength; |
|
|
} |
|
|
} else { |
|
|
cb([{ |
|
|
name: filename, |
|
|
offset: 0 |
|
|
}]); |
|
|
} |
|
|
}); |
|
|
|
|
|
function finishedSplit(filename, start) { |
|
|
files.push({ |
|
|
name: filename, |
|
|
offset: start |
|
|
}); |
|
|
total--; |
|
|
if (total === 0) { |
|
|
cb(files); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
function send_to_watson(file, callback) { |
|
|
fs.readFile(file, function(error, data) { |
|
|
if (error) { |
|
|
console.log(error); |
|
|
return false; |
|
|
} |
|
|
|
|
|
var options = { |
|
|
headers: { |
|
|
'Content-Type': 'audio/wav', |
|
|
'Transfer-Encoding': 'chunked', |
|
|
}, |
|
|
qs: { |
|
|
timestamps: 'true', |
|
|
continuous: 'true', |
|
|
inactivity_timeout: '-1' |
|
|
}, |
|
|
auth: { |
|
|
user: user, |
|
|
pass: pass |
|
|
}, |
|
|
body: data |
|
|
}; |
|
|
|
|
|
request.post(url, options, function(error, response, body) { |
|
|
if (error) console.log(error); |
|
|
|
|
|
// var parsed = parse(JSON.parse(body)); |
|
|
if (typeof callback !== 'undefined') { |
|
|
callback(JSON.parse(body)); |
|
|
} |
|
|
}); |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
function parse(data, offset) { |
|
|
if (typeof offset === 'undefined') offset = 0; |
|
|
|
|
|
var out = { |
|
|
sentences: [] |
|
|
}; |
|
|
|
|
|
out.sentences = data.results.map(function(r) { |
|
|
var item = {}; |
|
|
var _item = r.alternatives[0]; |
|
|
item.transcript = _item.transcript; |
|
|
item.confidence = _item.confidence; |
|
|
item.words = _item.timestamps.map(function(t) { |
|
|
return { |
|
|
word: t[0], |
|
|
start: t[1] + offset, |
|
|
end: t[2] + offset |
|
|
}; |
|
|
}); |
|
|
return item; |
|
|
}); |
|
|
|
|
|
return out; |
|
|
} |
|
|
|
|
|
|
|
|
function writeOut(file, data) { |
|
|
var out = []; |
|
|
|
|
|
data = data.sort(function(a, b) { |
|
|
return a.f.offset - b.f.offset; |
|
|
}); |
|
|
|
|
|
data.forEach(function(d) { |
|
|
out = out.concat(d.data.sentences); |
|
|
}); |
|
|
|
|
|
fs.writeFile(file + '.transcription.json', JSON.stringify(out), function() { |
|
|
if (data.length > 1) { |
|
|
data.forEach(function(d) { |
|
|
fs.unlink(d.f.name); |
|
|
}); |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
function transcribe(file, cb) { |
|
|
var out = []; |
|
|
var total = 0; |
|
|
|
|
|
convertToWav(file, function(newFile) { |
|
|
|
|
|
split(newFile, function(files) { |
|
|
total = files.length; |
|
|
|
|
|
files.forEach(function(f) { |
|
|
send_to_watson(f.name, function(data) { |
|
|
var parsed = parse(data, f.offset); |
|
|
out.push({ |
|
|
f: f, |
|
|
data: parsed |
|
|
}); |
|
|
total--; |
|
|
if (total === 0) { |
|
|
writeOut(file, out); |
|
|
} |
|
|
}); |
|
|
|
|
|
}); |
|
|
}); |
|
|
}); |
|
|
} |
|
|
|
|
|
var file = process.argv[2]; |
|
|
|
|
|
transcribe(file); |
|
|
|
|
|
module.exports = transcribe; |