Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -52,30 +52,30 @@ with gr.Blocks() as demo:
|
|
52 |
gr.Markdown(title)
|
53 |
|
54 |
with gr.Tabs():
|
55 |
-
with gr.TabItem("Standard TTS"):
|
56 |
with gr.Row():
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
output_audio_standard = gr.Audio(label="🌬️💬📝WhisperSpeech")
|
62 |
-
|
63 |
-
|
64 |
whisper_speech_demo,
|
65 |
-
inputs=[
|
66 |
outputs=output_audio_standard
|
67 |
)
|
68 |
-
|
69 |
-
with gr.TabItem("Mixed Language TTS"):
|
70 |
with gr.Row():
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
output_audio_mixed = gr.Audio(label="🌬️💬📝WhisperSpeech
|
75 |
-
|
76 |
-
|
77 |
whisper_speech_demo,
|
78 |
-
inputs=[
|
79 |
outputs=output_audio_mixed
|
80 |
)
|
81 |
|
|
|
52 |
gr.Markdown(title)
|
53 |
|
54 |
with gr.Tabs():
|
55 |
+
with gr.TabItem("🌬️💬📝Standard TTS"):
|
56 |
with gr.Row():
|
57 |
+
text_input_standard = gr.Textbox(label="Enter text")
|
58 |
+
lang_input_standard = gr.Dropdown(choices=list(LANGUAGES.keys()), label="Language")
|
59 |
+
speaker_input_standard = gr.Audio(label="Upload or Record Speaker Audio (optional)", sources=["upload", "microphone"], type="filepath")
|
60 |
+
generate_button_standard = gr.Button("Generate Speech")
|
61 |
output_audio_standard = gr.Audio(label="🌬️💬📝WhisperSpeech")
|
62 |
+
|
63 |
+
generate_button_standard.click(
|
64 |
whisper_speech_demo,
|
65 |
+
inputs=[text_input_standard, lang_input_standard, speaker_input_standard, None, None],
|
66 |
outputs=output_audio_standard
|
67 |
)
|
68 |
+
|
69 |
+
with gr.TabItem("🌬️💬📝Mixed Language TTS"):
|
70 |
with gr.Row():
|
71 |
+
mix_text_input_mixed = gr.Textbox(label="Enter mixed language text", placeholder="e.g., Hello, Cześć")
|
72 |
+
mix_lang_input_mixed = gr.CheckboxGroup(choices=list(LANGUAGES.keys()), label="Select Languages")
|
73 |
+
generate_button_mixed = gr.Button("Generate Mixed Speech")
|
74 |
+
output_audio_mixed = gr.Audio(label="Mixed🌬️💬📝WhisperSpeech")
|
75 |
+
|
76 |
+
generate_button_mixed.click(
|
77 |
whisper_speech_demo,
|
78 |
+
inputs=[None, None, None, mix_lang_input_mixed, mix_text_input_mixed],
|
79 |
outputs=output_audio_mixed
|
80 |
)
|
81 |
|