Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -50,28 +50,28 @@ def generate_segment_audio(text, lang, speaker_url, pipe):
|
|
50 |
|
51 |
# Function to append and concatenate audio segments with padding
|
52 |
def concatenate_audio_segments(segments):
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
|
65 |
# Pad the segment to the max length
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
|
71 |
-
concatenated_audio = np.
|
72 |
|
73 |
print("Concatenated audio shape:", concatenated_audio.shape) # Debug statement
|
74 |
-
concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
|
75 |
return concatenated_audio
|
76 |
|
77 |
# The rest of the code in app.py remains the same
|
@@ -93,11 +93,11 @@ def whisper_speech_demo(multilingual_text, speaker_audio):
|
|
93 |
audio_segments.append(audio_np)
|
94 |
concatenated_audio = concatenate_audio_segments(audio_segments)
|
95 |
print("Final concatenated audio shape:", concatenated_audio.shape) # Debug statement
|
96 |
-
|
97 |
-
|
98 |
|
99 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
|
100 |
-
sf.write(tmp_file.name,
|
101 |
return tmp_file.name
|
102 |
|
103 |
with gr.Blocks() as demo:
|
|
|
50 |
|
51 |
# Function to append and concatenate audio segments with padding
|
52 |
def concatenate_audio_segments(segments):
|
53 |
+
# # Determine the length of the longest segment
|
54 |
+
# max_length = max(seg.shape[0] for seg in segments)
|
55 |
+
# print("Max length of segments:", max_length) # Debug statement
|
56 |
+
# # Pad each segment to the length of the longest segment and stack them
|
57 |
+
# padded_segments = []
|
58 |
+
# for seg in segments:
|
59 |
+
# # Check if the segment is stereo; if not, convert it to stereo
|
60 |
+
# if seg.ndim == 1 or seg.shape[1] == 1:
|
61 |
+
# stereo_segment = np.stack((seg, seg), axis=-1)
|
62 |
+
# else:
|
63 |
+
# stereo_segment = seg
|
64 |
|
65 |
# Pad the segment to the max length
|
66 |
+
# padding_length = max_length - stereo_segment.shape[0]
|
67 |
+
# padded_segment = np.pad(stereo_segment, ((0, padding_length), (0, 0)), 'constant')
|
68 |
+
# print("Padded segment shape:", padded_segment.shape) # Debug statement
|
69 |
+
# padded_segments.append(padded_segment)
|
70 |
|
71 |
+
concatenated_audio = np.concatenate(segments , axis=1)
|
72 |
|
73 |
print("Concatenated audio shape:", concatenated_audio.shape) # Debug statement
|
74 |
+
# concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
|
75 |
return concatenated_audio
|
76 |
|
77 |
# The rest of the code in app.py remains the same
|
|
|
93 |
audio_segments.append(audio_np)
|
94 |
concatenated_audio = concatenate_audio_segments(audio_segments)
|
95 |
print("Final concatenated audio shape:", concatenated_audio.shape) # Debug statement
|
96 |
+
# audio_stereo = np.stack((concatenated_audio, concatenated_audio), axis=-1)
|
97 |
+
# audio_stereo = audio_stereo.reshape(-1, 2)
|
98 |
|
99 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
|
100 |
+
sf.write(tmp_file.name, concatenated_audio, 24000, format='WAV', subtype='PCM_16')
|
101 |
return tmp_file.name
|
102 |
|
103 |
with gr.Blocks() as demo:
|