Forked from dannguyen/EXAMPLE_WATSON_API_README.md
Created
February 25, 2016 16:30
-
-
Save prabhjotSL/6e25ba87e3b937ea64af to your computer and use it in GitHub Desktop.
Transcribing ProPublica podcast with Python and Watson Speech to Text API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pydub import AudioSegment | |
| from glob import glob | |
| from math import ceil | |
| from os.path import basename, splitext, exists | |
| import json | |
| import requests | |
| WATSON_USERNAME = "YOUR-USERNAME-HERE" | |
| WATSON_PASSWORD = "YOUR-PASSWORD" | |
| WATSON_ENDPOINT = 'https://stream.watsonplatform.net/speech-to-text/api/v1/recognize' | |
| WATSON_DEFAULT_PARAMS = { | |
| 'continuous': True, | |
| 'timestamps': True, | |
| 'word_confidence': True, | |
| } | |
| WATSON_DEFAULT_HEADERS = { | |
| 'content-type': 'audio/wav' | |
| } | |
| # via: http://www.propublica.org/podcast/item/how-a-reporter-pierced-the-hype-behind-theranos/ | |
| DOWNLOAD_URL = 'https://api.soundcloud.com/tracks/247345268/download?client_id=cUa40O3Jg3Emvp6Tv4U6ymYYO50NUGpJ' | |
| AUDIO_FILENAME = 'podcast.mp3' | |
| AUDIO_SEGMENT_SECONDS = 300 | |
| if not exists(AUDIO_FILENAME): | |
| print("Downloading from", DOWNLOAD_URL) | |
| resp = requests.get(DOWNLOAD_URL) | |
| with open(AUDIO_FILENAME, 'wb') as w: | |
| w.write(resp.content) | |
| print("Wrote audio file to", AUDIO_FILENAME) | |
| # convert to WAV | |
| audio = AudioSegment.from_mp3(AUDIO_FILENAME) | |
| xs = 0 | |
| while xs < audio.duration_seconds: | |
| ys = min(xs + AUDIO_SEGMENT_SECONDS, ceil(audio.duration_seconds)) | |
| fname = str(xs).rjust(5, '0') + '-' + str(ys).rjust(5, '0') + '.wav' | |
| audio[xs*1000:ys*1000].export(fname, format='wav') | |
| print("Saved", fname) | |
| xs = ys | |
| ## Transcribe each WAV to Watson | |
| for fname in glob("*.wav"): | |
| # Download watson's response | |
| tname = splitext(basename(fname))[0] + '.json' | |
| if exists(tname): | |
| print("Already transcribed", tname) | |
| else: | |
| print("Transcribing", fname) | |
| with open(fname, 'rb') as r: | |
| watson_response = requests.post( | |
| WATSON_ENDPOINT, | |
| data=r, | |
| auth=(WATSON_USERNAME, WATSON_PASSWORD), | |
| params=WATSON_DEFAULT_PARAMS, | |
| headers=WATSON_DEFAULT_HEADERS, | |
| stream=False | |
| ) | |
| with open(tname, 'w') as w: | |
| w.write(watson_response.text) | |
| print("Wrote transcript to", tname) | |
| # Print out the data | |
| for fname in sorted(glob("*.json")): | |
| with open(fname, r) as f: | |
| results = json.load(f) | |
| for linenum, result in enumerate(results): # each result is a line | |
| if result.get('alternatives'): # each result may have many alternatives | |
| # just pick best alternative | |
| lineobj = result.get('alternatives')[0] | |
| print(lineobj['transcript']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment