Skip to content

Instantly share code, notes, and snippets.

@panw3i
Forked from lhysrc/baidu_yuyin.py
Created August 1, 2017 12:39
Show Gist options
  • Save panw3i/c4a9ccaba6daed4645a08984f4ded6ac to your computer and use it in GitHub Desktop.
Save panw3i/c4a9ccaba6daed4645a08984f4ded6ac to your computer and use it in GitHub Desktop.
百度语音API,包括语音转换和语音识别。
#coding:utf-8
TEXT = "垂死病中惊坐起,笑问客从何处来" # 要转换成语音的文字
AUDIO_FILE = "./f.wav" # 要识别的语音文件
cuid = 'py'
apiKey = "..."
secretKey = "..."
auth_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id="\
+ apiKey + "&client_secret=" + secretKey
import urllib2
import json
import zlib
import base64
import urllib,httplib
import speech_recognition as sr # pip install SpeechRecognition PyAudio
def getJson(url):
request = urllib2.Request(url)
request.add_header('Accept-encoding', 'gzip')
opener = urllib2.build_opener()
response = opener.open(request)
html = response.read()
gzipped = response.headers.get('Content-Encoding')
if gzipped:
html = zlib.decompress(html, 16+zlib.MAX_WBITS)
return json.loads(html)
def getToken():
return getJson(auth_url)['access_token']
token = getToken()
def tts(text=TEXT,file_name = "tts_result.mp3"):
arr = { "tex":text,
"lan":"zh",
"ctp":1,
"tok":token,
"cuid":cuid}
req = urllib2.Request(url = "http://tsn.baidu.com/text2audio",data =urllib.urlencode(arr))
res_data = urllib2.urlopen(req)
res = res_data.read()
with open(file_name,'wb') as f:
f.write(res)
def recognizer(audio_file = AUDIO_FILE):
with open(audio_file, "rb") as f:
data = f.read()
base_date = base64.b64encode(data)
arr = {"format" : "wav",
"rate" : 8000,
"channel": 1,
"token" : "24.ec4d2760e97d8106f8e381092cc79934.2592000.1477395755.282335-4814376",
"cuid" : cuid,
"len" : len(data),
"speech" : base_date}
params = json.dumps(arr)
headers = {"Content-type": "application/json"}
req = urllib2.Request("http://vop.baidu.com/server_api", data=params, headers=headers)
resp = urllib2.urlopen(req)
data = resp.read()
return data
def record(file_name):
r = sr.Recognizer()
with sr.Microphone(sample_rate=8000) as source: # 可以指定采样率,默认16000
print("Say something!")
audio = r.listen(source)
with open(file_name, "wb") as f:
f.write(audio.get_wav_data()) # 保存为wav
if __name__ == '__main__':
# record("r.wav")
# tts()
print recognizer("r.wav")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment