Tonic commited on
Commit
50e659a
1 Parent(s): 67dbfa2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -20
app.py CHANGED
@@ -50,28 +50,28 @@ def generate_segment_audio(text, lang, speaker_url, pipe):
50
 
51
  # Function to append and concatenate audio segments with padding
52
  def concatenate_audio_segments(segments):
53
- # Determine the length of the longest segment
54
- max_length = max(seg.shape[0] for seg in segments)
55
- print("Max length of segments:", max_length) # Debug statement
56
- # Pad each segment to the length of the longest segment and stack them
57
- padded_segments = []
58
- for seg in segments:
59
- # Check if the segment is stereo; if not, convert it to stereo
60
- if seg.ndim == 1 or seg.shape[1] == 1:
61
- stereo_segment = np.stack((seg, seg), axis=-1)
62
- else:
63
- stereo_segment = seg
64
 
65
  # Pad the segment to the max length
66
- padding_length = max_length - stereo_segment.shape[0]
67
- padded_segment = np.pad(stereo_segment, ((0, padding_length), (0, 0)), 'constant')
68
- print("Padded segment shape:", padded_segment.shape) # Debug statement
69
- padded_segments.append(padded_segment)
70
 
71
- concatenated_audio = np.vstack(padded_segments)
72
 
73
  print("Concatenated audio shape:", concatenated_audio.shape) # Debug statement
74
- concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
75
  return concatenated_audio
76
 
77
  # The rest of the code in app.py remains the same
@@ -93,11 +93,11 @@ def whisper_speech_demo(multilingual_text, speaker_audio):
93
  audio_segments.append(audio_np)
94
  concatenated_audio = concatenate_audio_segments(audio_segments)
95
  print("Final concatenated audio shape:", concatenated_audio.shape) # Debug statement
96
- audio_stereo = np.stack((concatenated_audio, concatenated_audio), axis=-1)
97
- audio_stereo = audio_stereo.reshape(-1, 2)
98
 
99
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
100
- sf.write(tmp_file.name, audio_stereo, 24000, format='WAV', subtype='PCM_16')
101
  return tmp_file.name
102
 
103
  with gr.Blocks() as demo:
 
50
 
51
  # Function to append and concatenate audio segments with padding
52
  def concatenate_audio_segments(segments):
53
+ # # Determine the length of the longest segment
54
+ # max_length = max(seg.shape[0] for seg in segments)
55
+ # print("Max length of segments:", max_length) # Debug statement
56
+ # # Pad each segment to the length of the longest segment and stack them
57
+ # padded_segments = []
58
+ # for seg in segments:
59
+ # # Check if the segment is stereo; if not, convert it to stereo
60
+ # if seg.ndim == 1 or seg.shape[1] == 1:
61
+ # stereo_segment = np.stack((seg, seg), axis=-1)
62
+ # else:
63
+ # stereo_segment = seg
64
 
65
  # Pad the segment to the max length
66
+ # padding_length = max_length - stereo_segment.shape[0]
67
+ # padded_segment = np.pad(stereo_segment, ((0, padding_length), (0, 0)), 'constant')
68
+ # print("Padded segment shape:", padded_segment.shape) # Debug statement
69
+ # padded_segments.append(padded_segment)
70
 
71
+ concatenated_audio = np.concatenate(segments , axis=1)
72
 
73
  print("Concatenated audio shape:", concatenated_audio.shape) # Debug statement
74
+ # concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
75
  return concatenated_audio
76
 
77
  # The rest of the code in app.py remains the same
 
93
  audio_segments.append(audio_np)
94
  concatenated_audio = concatenate_audio_segments(audio_segments)
95
  print("Final concatenated audio shape:", concatenated_audio.shape) # Debug statement
96
+ # audio_stereo = np.stack((concatenated_audio, concatenated_audio), axis=-1)
97
+ # audio_stereo = audio_stereo.reshape(-1, 2)
98
 
99
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
100
+ sf.write(tmp_file.name, concatenated_audio, 24000, format='WAV', subtype='PCM_16')
101
  return tmp_file.name
102
 
103
  with gr.Blocks() as demo: