Skip to content

Instantly share code, notes, and snippets.

@qxj
Created March 24, 2023 07:43
Show Gist options
  • Select an option

  • Save qxj/16acb506c2fa98a8a93e096292f99a0b to your computer and use it in GitHub Desktop.

Select an option

Save qxj/16acb506c2fa98a8a93e096292f99a0b to your computer and use it in GitHub Desktop.

Revisions

  1. qxj created this gist Mar 24, 2023.
    45 changes: 45 additions & 0 deletions tts_demo.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,45 @@
    #!/usr/bin/env python3

    # coding: utf-8

    """
    Speech synthesis samples for the Microsoft Cognitive Services Speech SDK
    https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/python/console/speech_synthesis_sample.py
    """

    import os
    import azure.cognitiveservices.speech as speechsdk

    # Set up the subscription info for the Speech Service:
    speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
    audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)

    # https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts#prebuilt-neural-voices
    speech_config.speech_synthesis_voice_name='zh-CN-YunfengNeural'

    def speech_synthesis_to_file(text, file_name = "outputaudio.wav"):
    """performs speech synthesis to a wave file"""
    # Creates a speech synthesizer using file as audio output.
    file_config = speechsdk.audio.AudioOutputConfig(filename=file_name)
    # https://docs.microsoft.com/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs
    ext = os.path.splitext(file_name)
    if ext == ".mp3":
    speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=file_config)

    result = speech_synthesizer.speak_text_async(text).get()
    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized for text [{}], and the audio was saved to [{}]".format(text, file_name))
    elif result.reason == speechsdk.ResultReason.Canceled:
    cancellation_details = result.cancellation_details
    print("Speech synthesis canceled: {}".format(cancellation_details.reason))
    if cancellation_details.reason == speechsdk.CancellationReason.Error:
    print("Error details: {}".format(cancellation_details.error_details))


    text = '''众所周知,超图结构有助于图中节点的高阶关系建模,并且有利于建立节点的多种关系,受到众多研究者青睐。然而,与普通图网络类似,超图中静态的启发式拓扑结构与现实中动态演变的图节点关系相悖,限制了超图的学习效果。
    针对上述问题,我们来看一篇阿里发表在CIKM2022上发表的文章,动态超图协同过滤。文中提出了一种可微的轻量级多层超图学习器,它可以在训练过程中在不同的层动态地学习超图结构。
    论文所提模型DHLCF在Yelp, Gowalla和LastFM-2K数据集上针对NDCG@10上分别取得了14.91%、14.67%和25.67%的改进。
    '''
    filename = 'tts_demo.wav'
    speech_synthesis_to_file(text, filename)