Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -81,25 +81,18 @@ def whisper_speech_demo(multilingual_text, speaker_audio):
|
|
81 |
segments = parse_multilingual_text(multilingual_text)
|
82 |
if not segments:
|
83 |
return None, "No valid language segments found. Please use the format: <lang> text"
|
84 |
-
|
85 |
pipe = Pipeline()
|
86 |
speaker_url = speaker_audio if speaker_audio is not None else None
|
87 |
audio_segments = []
|
88 |
-
|
89 |
for lang, text in segments:
|
90 |
text_str = text if isinstance(text, str) else str(text)
|
91 |
audio_np = generate_segment_audio(text_str, lang, speaker_url, pipe)
|
92 |
print("Audio segment shape:", audio_np.shape) # Debug statement
|
93 |
audio_segments.append(audio_np)
|
94 |
-
|
95 |
concatenated_audio = concatenate_audio_segments(audio_segments)
|
96 |
print("Final concatenated audio shape:", concatenated_audio.shape) # Debug statement
|
97 |
-
|
98 |
-
# Normalize the concatenated audio
|
99 |
concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
|
100 |
-
|
101 |
-
# Return the concatenated audio as a NumPy array
|
102 |
-
return concatenated_audio
|
103 |
|
104 |
with gr.Blocks() as demo:
|
105 |
gr.Markdown(title)
|
|
|
81 |
segments = parse_multilingual_text(multilingual_text)
|
82 |
if not segments:
|
83 |
return None, "No valid language segments found. Please use the format: <lang> text"
|
|
|
84 |
pipe = Pipeline()
|
85 |
speaker_url = speaker_audio if speaker_audio is not None else None
|
86 |
audio_segments = []
|
|
|
87 |
for lang, text in segments:
|
88 |
text_str = text if isinstance(text, str) else str(text)
|
89 |
audio_np = generate_segment_audio(text_str, lang, speaker_url, pipe)
|
90 |
print("Audio segment shape:", audio_np.shape) # Debug statement
|
91 |
audio_segments.append(audio_np)
|
|
|
92 |
concatenated_audio = concatenate_audio_segments(audio_segments)
|
93 |
print("Final concatenated audio shape:", concatenated_audio.shape) # Debug statement
|
|
|
|
|
94 |
concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
|
95 |
+
return concatenated_audio[0]
|
|
|
|
|
96 |
|
97 |
with gr.Blocks() as demo:
|
98 |
gr.Markdown(title)
|