File size: 4,230 Bytes
d853483
 
 
 
 
 
80063f9
d853483
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c20f648
d853483
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed93ffe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# -*- coding: utf-8 -*-


import azure.cognitiveservices.speech as speechsdk
import gradio as gr
import io
import os

dict = {
    "中文": "zh-CN",
    "英语": "en-US",
    "法语": "fr-FR",
    "西班牙语": "es-ES",
    "阿拉伯语": "ar-SA",
    "葡萄牙语": "pt-PT",
    "泰语": "th-TH",
    "越南语": "vi-VN",
    "俄语": "ru-RU",
    "日语": "ja-JP",
    "德语": "de-DE",
    "印度尼西亚语": "id-ID",
    "韩语": "ko-KR",
    "菲律宾语": "fil-PH",
    "意大利语": "it-IT",
    "荷兰语": "nl-NL",
    "波兰语": "pl-PL",
    "瑞典语": "sv-SE",
    "希伯来语":"he-IL",
    "土耳其语": "tr-TR",
    "马来语": "ms-MY",
    "匈牙利语": "hu-HU",
    "希腊语": "el-GR",
    "捷克语": "cs-CZ",
    "丹麦语": "da-DK",
    "挪威语": "nb-NO",
    "芬兰语": "fi-FI",
    "斯洛文尼亚语": "sl-SI",
    "爱沙尼亚语": "et-EE",
    "拉脱维亚语": "lv-LV",
    "立陶宛语": "lt-LT",
    "克罗地亚语": "hr-HR",
    "罗马尼亚语": "ro-RO",
    "斯洛伐克语": "sk-SK",
    "保加利亚语": "bg-BG",
    "塞尔维亚语": "sr-RS",
    "乌克兰语": "uk-UA",
    "繁体中文": "zh-TW",
    "印地语": "hi-IN",
    "挪威博克马尔语": "nb-NO",
    "波斯语": "fa-IR",
    "罗马语": "rm-CH",
    "斯瓦希里语": "sw-KE",
    "孟加拉语": "bn-BD",
    "波斯尼亚语": "bs-BA",
    "加泰罗尼亚语": "ca-ES",
    "克里奥尔语": "ht-HT",
    "爱尔兰语": "ga-IE",
    "卡纳达语": "kn-IN",
    "哈萨克语": "kk-KZ",
    "马其顿语": "mk-MK",
    "马拉雅拉姆语": "ml-IN",
    "毛利语": "mi-NZ",
    "尼泊尔语": "ne-NP",
    "普什图语": "ps-AF",
    "旁遮普语": "pa-IN",
    "萨摩亚语": "sm-WS",
    "斯洛伐克语": "sk-SK",
    "索马里语": "so-SO",
    "瑞典语": "sv-SE",
    "塔加洛语": "tl-PH",
    "塔吉克语": "tg-TJ",
    "泰米尔语": "ta-IN",
    "泰卢固语": "te-IN",
    "图库尔语": "tk-TM",
    "乌尔都语": "ur-PK",
    "乌兹别克语": "uz-UZ",
    "威尔士语": "cy-GB",
    "科西嘉语": "co-FR",
    "弗里西语": "fy-NL",
    "加利西亚语": "gl-ES",
}



def text_to_speech(text, language_code):
    # Replace with your own subscription key and region identifier from Azure portal
    subscription_key = os.getenv('subscription_key')
    region = "eastus"

    # Creates an instance of a speech config with specified subscription key and service region.
    speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=region, speech_recognition_language=dict[language_code])

    # Creates a speech synthesizer using the default speaker as audio output.
    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)

    # Synthesizes the received text to speech.
    result = speech_synthesizer.speak_text_async(text).get()

    # Checks result.
    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        audio_stream = io.BytesIO(result.audio_data)
        file_path = save_audio(audio_stream)
        return "Speech synthesized to speaker for text [{}]".format(text), file_path
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        return "Speech synthesis canceled: {}".format(cancellation_details.reason), None
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            if cancellation_details.error_details:
                return "Error details: {}".format(cancellation_details.error_details), None
            return "Did you update the subscription info?", None


def save_audio(audio_stream):
    file_path = "speech.wav"
    with open(file_path, "wb") as f:
        f.write(audio_stream.read())
    return file_path


input_text = gr.inputs.Textbox(lines=5, label="Input Text")
output_text = gr.outputs.Textbox(label="Output Text")
output_audio = gr.outputs.Audio(type="filepath", label="导出文件")
language = gr.inputs.Dropdown(choices=list(dict.keys()), label="Language")
interface = gr.Interface(fn=text_to_speech, inputs=[input_text, language], outputs=[output_text, output_audio], title="微软文字转语音")

interface.launch()