voice_clone_detection

Runtime error

Kabatubare commited on Mar 13, 2024

Commit

245b81a

verified ·

1 Parent(s): fac0d91

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,9 +36,11 @@ def augment_and_extract_features(audio_path, sr=16000, n_mfcc=40, n_fft=2048, ho
 def predict_voice(audio_file_path):
     try:
         features_tensor = augment_and_extract_features(audio_file_path)
-        # Reshape tensor to match model input size, avoiding dimension mismatch errors
-        if features_tensor.shape[-1] != model.config.num_labels:
-            features_tensor = torch.nn.functional.interpolate(features_tensor, size=(model.config.num_labels,), mode='linear', align_corners=False)
         with torch.no_grad():
             outputs = model(features_tensor)
@@ -63,4 +65,3 @@ iface = gr.Interface(
 )
 iface.launch()

 def predict_voice(audio_file_path):
     try:
         features_tensor = augment_and_extract_features(audio_file_path)
+        # Adjust model input size or preprocessing to avoid size mismatch with convolution kernel
+        if features_tensor.dim() < 4:  # Ensure tensor is 4D (batch, channel, height, width) for CNNs
+            features_tensor = features_tensor.unsqueeze(1)  # Add a channel dimension if missing
+        # Apply adaptive pooling to match model expected input size if necessary
+        features_tensor = torch.nn.AdaptiveAvgPool2d((model.config.num_labels, model.config.num_labels))(features_tensor)
         with torch.no_grad():
             outputs = model(features_tensor)
 )
 iface.launch()