fffiloni commited on
Commit
52ad00f
·
verified ·
1 Parent(s): 66255ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -11
app.py CHANGED
@@ -133,33 +133,44 @@ def infer(audio_input_path):
133
  def load_chosen_audio(audio_path):
134
  return audio_path
135
 
136
- def overlay_audio(original_mp3: str, translated_wav: str, volume_reduction_db: int = 10) -> str:
 
 
 
 
 
137
  """
138
  Overlays translated audio on top of the original, reduces the original volume,
139
  and ensures the final audio lasts as long as the longer of the two tracks.
140
-
141
  :param original_mp3: Path to the original MP3 file.
142
  :param translated_wav: Path to the translated WAV file.
143
  :param volume_reduction_db: Volume reduction in dB (default is -10 dB).
 
144
  :return: Path to the temporary output WAV file.
145
  """
146
  # Load original MP3 and convert to WAV
147
  original = AudioSegment.from_mp3(original_mp3).set_frame_rate(16000).set_channels(1)
148
 
149
- # Lower the volume (default is -10 dB)
150
  original = original - volume_reduction_db
151
 
152
  # Load the translated WAV
153
  translated = AudioSegment.from_wav(translated_wav).set_frame_rate(16000).set_channels(1)
154
-
 
 
 
 
 
155
  # Determine the final length (longer of the two)
156
  final_length = max(len(original), len(translated))
157
-
158
  # Extend the shorter track with silence to match the longer track
159
  if len(original) < final_length:
160
- original = original + AudioSegment.silent(duration=final_length - len(original))
161
  if len(translated) < final_length:
162
- translated = translated + AudioSegment.silent(duration=final_length - len(translated))
163
 
164
  # Overlay the translated speech over the original
165
  combined = original.overlay(translated)
@@ -169,11 +180,10 @@ def overlay_audio(original_mp3: str, translated_wav: str, volume_reduction_db: i
169
  combined.export(temp_file.name, format="wav")
170
 
171
  print(f"Final audio saved at: {temp_file.name}")
172
- return temp_file.name # Return the temporary file path
173
 
174
- def process_final_combination(audio_in, chosen_translated, video_input):
175
  audio_in = process_audio(audio_in)
176
- temp_output_path = overlay_audio(audio_in, chosen_translated)
177
 
178
  if video_input:
179
  return gr.update(value=temp_output_path, visible=True), gr.update(visible=True)
@@ -251,6 +261,9 @@ with gr.Blocks(css=css) as demo:
251
  scale=2
252
  )
253
  choose_this_btn = gr.Button("Use this one", scale=1, visible=False)
 
 
 
254
  combined_output = gr.Audio("Combined Output", type="filepath", visible=False)
255
  apply_to_video_btn = gr.Button("Apply to video", visible=False)
256
  final_video_out = gr.Video(label="Video + Translated Audio", visible=False)
@@ -281,7 +294,7 @@ with gr.Blocks(css=css) as demo:
281
 
282
  choose_this_btn.click(
283
  fn = process_final_combination,
284
- inputs = [audio_input, dropdown_wav_selector, video_input],
285
  outputs = [combined_output, apply_to_video_btn]
286
  )
287
 
 
133
  def load_chosen_audio(audio_path):
134
  return audio_path
135
 
136
+ def overlay_audio(
137
+ original_mp3: str,
138
+ translated_wav: str,
139
+ volume_reduction_db: int = 10,
140
+ cut_start: float = 0.0
141
+ ) -> str:
142
  """
143
  Overlays translated audio on top of the original, reduces the original volume,
144
  and ensures the final audio lasts as long as the longer of the two tracks.
145
+
146
  :param original_mp3: Path to the original MP3 file.
147
  :param translated_wav: Path to the translated WAV file.
148
  :param volume_reduction_db: Volume reduction in dB (default is -10 dB).
149
+ :param cut_start: Number of seconds to trim from the start of the translated audio (default: 0.0).
150
  :return: Path to the temporary output WAV file.
151
  """
152
  # Load original MP3 and convert to WAV
153
  original = AudioSegment.from_mp3(original_mp3).set_frame_rate(16000).set_channels(1)
154
 
155
+ # Lower the volume
156
  original = original - volume_reduction_db
157
 
158
  # Load the translated WAV
159
  translated = AudioSegment.from_wav(translated_wav).set_frame_rate(16000).set_channels(1)
160
+
161
+ # Trim the start of the translated audio if needed
162
+ if cut_start > 0:
163
+ cut_ms = int(cut_start * 1000) # Convert seconds to milliseconds
164
+ translated = translated[cut_ms:]
165
+
166
  # Determine the final length (longer of the two)
167
  final_length = max(len(original), len(translated))
168
+
169
  # Extend the shorter track with silence to match the longer track
170
  if len(original) < final_length:
171
+ original += AudioSegment.silent(duration=final_length - len(original))
172
  if len(translated) < final_length:
173
+ translated += AudioSegment.silent(duration=final_length - len(translated))
174
 
175
  # Overlay the translated speech over the original
176
  combined = original.overlay(translated)
 
180
  combined.export(temp_file.name, format="wav")
181
 
182
  print(f"Final audio saved at: {temp_file.name}")
 
183
 
184
+ def process_final_combination(audio_in, chosen_translated, volume, cut_start, video_input):
185
  audio_in = process_audio(audio_in)
186
+ temp_output_path = overlay_audio(audio_in, chosen_translated, volume, cut_start)
187
 
188
  if video_input:
189
  return gr.update(value=temp_output_path, visible=True), gr.update(visible=True)
 
261
  scale=2
262
  )
263
  choose_this_btn = gr.Button("Use this one", scale=1, visible=False)
264
+ with gr.Row():
265
+ volume_reduction = gr.Slider(label="Volume reduction", minimum=0, maximum=12, step=1, value=12)
266
+ cut_start = gr.Slider(label="Cut start", minimum=0.0, maximum=4.0, step=0.1, value=2.0)
267
  combined_output = gr.Audio("Combined Output", type="filepath", visible=False)
268
  apply_to_video_btn = gr.Button("Apply to video", visible=False)
269
  final_video_out = gr.Video(label="Video + Translated Audio", visible=False)
 
294
 
295
  choose_this_btn.click(
296
  fn = process_final_combination,
297
+ inputs = [audio_input, dropdown_wav_selector, volume_reduction, cut_start, video_input],
298
  outputs = [combined_output, apply_to_video_btn]
299
  )
300