Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -35,23 +35,16 @@ def whisper_speech_demo(text, lang, speaker_audio, mix_lang, mix_text):
|
|
35 |
|
36 |
resample_audio = resampler(newsr=24000)
|
37 |
audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
|
38 |
-
# Normalize audio
|
39 |
audio_np = audio_data_resampled.cpu().numpy()
|
40 |
audio_np = audio_np / np.max(np.abs(audio_np))
|
41 |
-
|
42 |
-
# Ensure audio data is in the correct format
|
43 |
audio_np = np.asarray(audio_np, dtype=np.float32)
|
44 |
|
45 |
-
# Create stereo audio by duplicating the mono channel
|
46 |
audio_stereo = np.stack((audio_np, audio_np), axis=-1)
|
|
|
47 |
|
48 |
-
#
|
49 |
-
print("Audio Array
|
50 |
-
print("Audio Array Dtype:", audio_stereo.dtype)
|
51 |
-
|
52 |
-
# Save to a temporary WAV file as stereo
|
53 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
|
54 |
-
# Write the stereo data with a sample rate of 24000 Hz
|
55 |
sf.write(tmp_file.name, audio_stereo, 24000, format='WAV', subtype='PCM_16')
|
56 |
return tmp_file.name
|
57 |
|
|
|
35 |
|
36 |
resample_audio = resampler(newsr=24000)
|
37 |
audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
|
|
|
38 |
audio_np = audio_data_resampled.cpu().numpy()
|
39 |
audio_np = audio_np / np.max(np.abs(audio_np))
|
|
|
|
|
40 |
audio_np = np.asarray(audio_np, dtype=np.float32)
|
41 |
|
|
|
42 |
audio_stereo = np.stack((audio_np, audio_np), axis=-1)
|
43 |
+
audio_stereo = audio_stereo.reshape(-1, 2)
|
44 |
|
45 |
+
# print("Audio Array Shape:", audio_stereo.shape)
|
46 |
+
# print("Audio Array Dtype:", audio_stereo.dtype)
|
|
|
|
|
|
|
47 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
|
|
|
48 |
sf.write(tmp_file.name, audio_stereo, 24000, format='WAV', subtype='PCM_16')
|
49 |
return tmp_file.name
|
50 |
|