Last active
December 28, 2023 15:14
-
-
Save lukehoban/0ee5c1bef438dc5bd7cb to your computer and use it in GitHub Desktop.
Revisions
-
lukehoban revised this gist
Jul 11, 2015 . 1 changed file with 79 additions and 79 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -13,94 +13,94 @@ console.log('Input text: "' + str + '"'); getAccessToken(clientId, clientSecret, function(err, accessToken) { if(err) return console.log(err); console.log('Got access token: ' + accessToken) textToSpeech(str, 'test.wav', accessToken, function(err) { if(err) return console.log(err); console.log('Wrote out: ' + 'test.wav'); speechToText('test.wav', accessToken, function(err, res) { if(err) return console.log(err); console.log('Confidence ' + res.results[0].confidence + ' for: "' + res.results[0].lexical + '"'); }); }); }) // ==== Helpers ==== function getAccessToken(clientId, clientSecret, callback) { request.post({ url: 'https://oxford-speech.cloudapp.net/token/issueToken', form: { 'grant_type': 'client_credentials', 'client_id': encodeURIComponent(clientId), 'client_secret': encodeURIComponent(clientSecret), 'scope': 'https://speech.platform.bing.com' } }, function(err, resp, body) { if(err) return callback(err); try { var accessToken = JSON.parse(body).access_token; if(accessToken) { callback(null, accessToken); } else { callback(body); } } catch(e) { callback(e); } }); } function textToSpeech(text, filename, accessToken, callback) { var ssmlTemplate = "<speak version='1.0' xml:lang='en-us'><voice xml:lang='%s' xml:gender='%s' name='%s'>%s</voice></speak>"; request.post({ url: 'http://speech.platform.bing.com/synthesize', body: util.format(ssmlTemplate, 'en-US', 'Female', 'Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)', text), encoding: null, headers: { 'Authorization': 'Bearer ' + accessToken, 'Content-Type' : 'application/ssml+xml', 'X-Microsoft-OutputFormat' : 'riff-16khz-16bit-mono-pcm', 'X-Search-AppId': '07D3234E49CE426DAA29772419F436CA', 'X-Search-ClientID': '1ECFAE91408841A480F00935DC390960', } }, function(err, resp, body) { if(err) return callback(err); fs.writeFile(filename, body, 'binary', function (err) { if (err) return callback(err); callback(null); }); }); } function speechToText(filename, accessToken, callback) { fs.readFile(filename, function(err, waveData) { if(err) return callback(err); request.post({ url: 'https://speech.platform.bing.com/recognize/query', qs: { 'scenarios': 'ulm', 'appid': 'D4D52672-91D7-4C74-8AD8-42B1D98141A5', // This magic value is required 'locale': 'en-US', 'device.os': 'wp7', 'version': '3.0', 'format': 'json', 'requestid': '1d4b6030-9099-11e0-91e4-0800200c9a66', // can be anything 'instanceid': '1d4b6030-9099-11e0-91e4-0800200c9a66' // can be anything }, body: waveData, headers: { 'Authorization': 'Bearer ' + accessToken, 'Content-Type': 'audio/wav; samplerate=16000', 'Content-Length' : waveData.length } }, function(err, resp, body) { if(err) return callback(err); try { callback(null, JSON.parse(body)); } catch(e) { callback(e); } }); }); } -
lukehoban revised this gist
Jul 11, 2015 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -11,8 +11,8 @@ console.log('Converting from text -> speech -> text.'); console.log('Input text: "' + str + '"'); getAccessToken(clientId, clientSecret, function(err, accessToken) { if(err) return console.log(err); console.log('Got access token: ' + accessToken) textToSpeech(str, 'test.wav', accessToken, function(err) { if(err) return console.log(err); -
lukehoban created this gist
Jul 11, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,106 @@ var fs = require('fs'); var util = require('util'); var request = require('request'); var clientId = 'test-app'; // Can be anything var clientSecret = 'f6f0bfec08274b8790520a9079b808af'; // API key from Azure marketplace var str = 'This is a cool demo to call Microsoft text to speach service in Node.js.'; console.log('Converting from text -> speech -> text.'); console.log('Input text: "' + str + '"'); getAccessToken(clientId, clientSecret, function(err, accessToken) { if(err) return console.log(err); console.log('Got access token: ' + accessToken) textToSpeech(str, 'test.wav', accessToken, function(err) { if(err) return console.log(err); console.log('Wrote out: ' + 'test.wav'); speechToText('test.wav', accessToken, function(err, res) { if(err) return console.log(err); console.log('Confidence ' + res.results[0].confidence + ' for: "' + res.results[0].lexical + '"'); }); }); }) // ==== Helpers ==== function getAccessToken(clientId, clientSecret, callback) { request.post({ url: 'https://oxford-speech.cloudapp.net/token/issueToken', form: { 'grant_type': 'client_credentials', 'client_id': encodeURIComponent(clientId), 'client_secret': encodeURIComponent(clientSecret), 'scope': 'https://speech.platform.bing.com' } }, function(err, resp, body) { if(err) return callback(err); try { var accessToken = JSON.parse(body).access_token; if(accessToken) { callback(null, accessToken); } else { callback(body); } } catch(e) { callback(e); } }); } function textToSpeech(text, filename, accessToken, callback) { var ssmlTemplate = "<speak version='1.0' xml:lang='en-us'><voice xml:lang='%s' xml:gender='%s' name='%s'>%s</voice></speak>"; request.post({ url: 'http://speech.platform.bing.com/synthesize', body: util.format(ssmlTemplate, 'en-US', 'Female', 'Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)', text), encoding: null, headers: { 'Authorization': 'Bearer ' + accessToken, 'Content-Type' : 'application/ssml+xml', 'X-Microsoft-OutputFormat' : 'riff-16khz-16bit-mono-pcm', 'X-Search-AppId': '07D3234E49CE426DAA29772419F436CA', 'X-Search-ClientID': '1ECFAE91408841A480F00935DC390960', } }, function(err, resp, body) { if(err) return callback(err); fs.writeFile(filename, body, 'binary', function (err) { if (err) return callback(err); callback(null); }); }); } function speechToText(filename, accessToken, callback) { fs.readFile(filename, function(err, waveData) { if(err) return callback(err); request.post({ url: 'https://speech.platform.bing.com/recognize/query', qs: { 'scenarios': 'ulm', 'appid': 'D4D52672-91D7-4C74-8AD8-42B1D98141A5', // This magic value is required 'locale': 'en-US', 'device.os': 'wp7', 'version': '3.0', 'format': 'json', 'requestid': '1d4b6030-9099-11e0-91e4-0800200c9a66', // can be anything 'instanceid': '1d4b6030-9099-11e0-91e4-0800200c9a66' // can be anything }, body: waveData, headers: { 'Authorization': 'Bearer ' + accessToken, 'Content-Type': 'audio/wav; samplerate=16000', 'Content-Length' : waveData.length } }, function(err, resp, body) { if(err) return callback(err); try { callback(null, JSON.parse(body)); } catch(e) { callback(e); } }); }); }