Spaces:

goooofy
/

zerovox-demo

Running

Guenter Bartsch commited on Oct 21, 2024

Commit

41889ec

1 Parent(s): f099617

update layout, default speaker

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ from zerovox.g2p.g2p import DEFAULT_G2P_MODEL_NAME_DE, DEFAULT_G2P_MODEL_NAME_EN
 SAMPLE_RATE=24000 # FIXME
 if "text" not in st.session_state:
     st.session_state.text = "Welcome to the world of speech synthesis!"
@@ -86,7 +88,7 @@ def do_synth():
 st.markdown("# ZeroVOX TTS Demo\n\nZeroVOX is a zero-shot realtime TTS system, fully offline, free and open source.\n\nFor more information, check out\n[https://github.com/gooofy/zerovox](https://github.com/gooofy/zerovox)\n")
-col1, col2 = st.columns(2)
 with col1:
     lang = st.selectbox("Language",
@@ -94,19 +96,23 @@ with col1:
                         on_change=update_text_input,
                         key='lang')
-    text = st.text_input("Text to synthesize", value=st.session_state.text)
-    st.button("Synthesize!", type="primary", on_click=do_synth)
-with col2:
     meldec = st.selectbox("MEL decoder",
                         ["meldec-libritts-multi-band-melgan-v2", "meldec-libritts-hifigan-v1"],
                         #on_change=update_text_input,
                         key='meldec')
-    speakerref = st.selectbox("Voice sample", ZeroVoxTTS.available_speakerrefs())
-    st.audio(ZeroVoxTTS.get_speakerref(speakerref, SAMPLE_RATE), sample_rate=SAMPLE_RATE)
 status = st.status(st.session_state.message, state="complete")

 SAMPLE_RATE=24000 # FIXME
+DEFAULT_SPEAKER = 'en_speaker_00061.wav'
 if "text" not in st.session_state:
     st.session_state.text = "Welcome to the world of speech synthesis!"
 st.markdown("# ZeroVOX TTS Demo\n\nZeroVOX is a zero-shot realtime TTS system, fully offline, free and open source.\n\nFor more information, check out\n[https://github.com/gooofy/zerovox](https://github.com/gooofy/zerovox)\n")
+col1, col2, col3 = st.columns(3)
 with col1:
     lang = st.selectbox("Language",
                         on_change=update_text_input,
                         key='lang')
+with col2:
+    speakers = [s for s in ZeroVoxTTS.available_speakerrefs()]
+    speakerref = st.selectbox("Voice sample", speakers, index=speakers.index(DEFAULT_SPEAKER))
+    st.audio(ZeroVoxTTS.get_speakerref(speakerref, SAMPLE_RATE), sample_rate=SAMPLE_RATE)
+with col3:
     meldec = st.selectbox("MEL decoder",
                         ["meldec-libritts-multi-band-melgan-v2", "meldec-libritts-hifigan-v1"],
                         #on_change=update_text_input,
                         key='meldec')
+text = st.text_input("Text to synthesize", value=st.session_state.text)
+st.button("Synthesize!", type="primary", on_click=do_synth)
 status = st.status(st.session_state.message, state="complete")