fffiloni commited on
Commit
05091a2
·
verified ·
1 Parent(s): b8ba919

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -206,6 +206,10 @@ def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.
206
  post_masked_spec = denormalize(masked_spec).to(device, dtype)
207
  denorm_masked_spec = denormalize_spectrogram(post_masked_spec)
208
  denorm_masked_spec_audio = vocoder.inference(denorm_masked_spec)
 
 
 
 
209
 
210
  denorm_spec = denormalize_spectrogram(output_spec)
211
  denorm_spec_audio = vocoder.inference(denorm_spec)
@@ -225,7 +229,9 @@ def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.
225
  print("Output spectrogram min/max:", output_spec.min().item(), output_spec.max().item())
226
 
227
  # Save as WAV
228
- sf.write("output.wav", denorm_spec_audio, 16000)
 
 
229
 
230
  # Save input spectrogram image
231
  input_spec_image_path = "input_spectrogram.png"
@@ -235,7 +241,7 @@ def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.
235
  output_spec_image_path = "output_spectrogram.png"
236
  color_output_spec_image.save(output_spec_image_path)
237
 
238
- return "output.wav", input_spec_image_path, color_output_spec_image
239
 
240
  def load_input_spectrogram(audio_path):
241
  # Loading
@@ -380,6 +386,11 @@ with gr.Blocks(css=css) as demo:
380
  with gr.Column():
381
  input_spectrogram_inp = gr.Image(label="Input Spectrogram")
382
  output_spectrogram_inp = gr.Image(label="Output Spectrogram")
 
 
 
 
 
383
 
384
  gr.Examples(
385
  examples = [
@@ -415,7 +426,7 @@ with gr.Blocks(css=css) as demo:
415
  submit_btn_inp.click(
416
  fn = infer_inp,
417
  inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
418
- outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp]
419
  )
420
 
421
  demo.queue().launch(show_api=False, show_error=True)
 
206
  post_masked_spec = denormalize(masked_spec).to(device, dtype)
207
  denorm_masked_spec = denormalize_spectrogram(post_masked_spec)
208
  denorm_masked_spec_audio = vocoder.inference(denorm_masked_spec)
209
+
210
+ # Rescale generated spectrogram to match original range
211
+ output_spec = (output_spec - output_spec.min()) / (output_spec.max() - output_spec.min()) # Normalize to [0,1]
212
+ output_spec = output_spec * (norm_spec.max() - norm_spec.min()) + norm_spec.min() # Rescale to match input range
213
 
214
  denorm_spec = denormalize_spectrogram(output_spec)
215
  denorm_spec_audio = vocoder.inference(denorm_spec)
 
229
  print("Output spectrogram min/max:", output_spec.min().item(), output_spec.max().item())
230
 
231
  # Save as WAV
232
+ sf.write("raw_output.wav", raw_chunk_audio, 16000)
233
+ sf.write("masked_raw_output.wav", denorm_masked_spec_audio, 16000)
234
+ sf.write("generated_output.wav", denorm_spec_audio, 16000)
235
 
236
  # Save input spectrogram image
237
  input_spec_image_path = "input_spectrogram.png"
 
241
  output_spec_image_path = "output_spectrogram.png"
242
  color_output_spec_image.save(output_spec_image_path)
243
 
244
+ return "raw_output.wav", input_spec_image_path, color_output_spec_image, "raw_output.wav", "masked_raw_output.wav"
245
 
246
  def load_input_spectrogram(audio_path):
247
  # Loading
 
386
  with gr.Column():
387
  input_spectrogram_inp = gr.Image(label="Input Spectrogram")
388
  output_spectrogram_inp = gr.Image(label="Output Spectrogram")
389
+
390
+ with gr.Accordion("Raw Processed audio", open=False):
391
+ with gr.Column():
392
+ raw_out_audio = gr.Audio(label="RAW Audio")
393
+ raw_masked_out_audio = gr.Audio(label="RAW Masked Audio")
394
 
395
  gr.Examples(
396
  examples = [
 
426
  submit_btn_inp.click(
427
  fn = infer_inp,
428
  inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
429
+ outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp, raw_out_audio, raw_masked_out_audio]
430
  )
431
 
432
  demo.queue().launch(show_api=False, show_error=True)