Tonic commited on
Commit
084c0d1
1 Parent(s): c4b4e50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -47,14 +47,17 @@ def generate_segment_audio(text, lang, speaker_url, pipe):
47
  audio_np = audio_data_resampled.cpu().numpy()
48
  return audio_np
49
 
50
- # this function pads each segment to the length of the longest segment which is not optimal
51
  def concatenate_audio_segments(segments):
52
- mono_segments = [seg[:, 0] if seg.ndim > 1 else seg for seg in segments]
53
- max_len = max(seg.shape[0] for seg in mono_segments)
54
- padded_segments = [np.pad(seg, (0, max_len - seg.shape[0]), 'constant') for seg in mono_segments]
55
- concatenated_audio = np.concatenate(padded_segments, axis=0)
 
 
 
56
  concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
57
- return np.asarray(concatenated_audio, dtype=np.float32)
58
 
59
  @spaces.GPU
60
  def whisper_speech_demo(multilingual_text, speaker_audio):
 
47
  audio_np = audio_data_resampled.cpu().numpy()
48
  return audio_np
49
 
50
+ # Function to concatenate audio segments in stereo
51
  def concatenate_audio_segments(segments):
52
+ total_length = sum(seg.shape[0] for seg in segments)
53
+ concatenated_audio = np.zeros((total_length, 2), dtype=np.float32)
54
+ current_index = 0
55
+ for seg in segments:
56
+ end_index = current_index + seg.shape[0]
57
+ concatenated_audio[current_index:end_index, :] = seg
58
+ current_index = end_index
59
  concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
60
+ return concatenated_audio
61
 
62
  @spaces.GPU
63
  def whisper_speech_demo(multilingual_text, speaker_audio):