Spaces:
Running
on
Zero
Running
on
Zero
Update utils/decode.py
Browse files- utils/decode.py +9 -1
utils/decode.py
CHANGED
@@ -188,6 +188,9 @@ def decode_one_audio_frcrn_se_16k(model, device, inputs, args):
|
|
188 |
# If no segmentation is required, process the entire input
|
189 |
outputs = model.inference(inputs).detach().cpu().numpy() # Inference on full input
|
190 |
|
|
|
|
|
|
|
191 |
return outputs # Return the decoded audio output
|
192 |
|
193 |
def decode_one_audio_mossformergan_se_16k(model, device, inputs, args):
|
@@ -439,7 +442,12 @@ def decode_one_audio_mossformer2_se_48k(model, device, inputs, args):
|
|
439 |
# Reconstruct audio from the masked spectrogram
|
440 |
outputs = istft(masked_spec_complex, args, len(audio))
|
441 |
|
442 |
-
|
|
|
|
|
|
|
|
|
|
|
443 |
|
444 |
def decode_one_audio_AV_MossFormer2_TSE_16K(model, inputs, args):
|
445 |
"""Processes video inputs through the AV mossformer2 model with Target speaker extraction (TSE) for decoding at 16kHz.
|
|
|
188 |
# If no segmentation is required, process the entire input
|
189 |
outputs = model.inference(inputs).detach().cpu().numpy() # Inference on full input
|
190 |
|
191 |
+
#normalize outputs
|
192 |
+
max_abs = max(abs(outputs), 1e-6)
|
193 |
+
outputs = outputs / max_abs
|
194 |
return outputs # Return the decoded audio output
|
195 |
|
196 |
def decode_one_audio_mossformergan_se_16k(model, device, inputs, args):
|
|
|
442 |
# Reconstruct audio from the masked spectrogram
|
443 |
outputs = istft(masked_spec_complex, args, len(audio))
|
444 |
|
445 |
+
outpus = outputs.numpy() / MAX_WAV_VALUE # Return the output normalized to [-1, 1]
|
446 |
+
#normalize outputs
|
447 |
+
max_abs = max(abs(outputs), 1e-6)
|
448 |
+
outputs = outputs / max_abs
|
449 |
+
|
450 |
+
return outputs
|
451 |
|
452 |
def decode_one_audio_AV_MossFormer2_TSE_16K(model, inputs, args):
|
453 |
"""Processes video inputs through the AV mossformer2 model with Target speaker extraction (TSE) for decoding at 16kHz.
|