voice_clone_detection

Runtime error

Kabatubare commited on Mar 13, 2024

Commit

eb22fab

verified ·

1 Parent(s): 245b81a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,11 +36,12 @@ def augment_and_extract_features(audio_path, sr=16000, n_mfcc=40, n_fft=2048, ho
 def predict_voice(audio_file_path):
     try:
         features_tensor = augment_and_extract_features(audio_file_path)
-        # Adjust model input size or preprocessing to avoid size mismatch with convolution kernel
-        if features_tensor.dim() < 4:  # Ensure tensor is 4D (batch, channel, height, width) for CNNs
-            features_tensor = features_tensor.unsqueeze(1)  # Add a channel dimension if missing
-        # Apply adaptive pooling to match model expected input size if necessary
-        features_tensor = torch.nn.AdaptiveAvgPool2d((model.config.num_labels, model.config.num_labels))(features_tensor)
         with torch.no_grad():
             outputs = model(features_tensor)

 def predict_voice(audio_file_path):
     try:
         features_tensor = augment_and_extract_features(audio_file_path)
+        # Correct the tensor shape to match expected input format for convolutional layers
+        if features_tensor.dim() > 4:  # Check if tensor has extra dimensions
+            features_tensor = features_tensor.squeeze()  # Remove unnecessary dimensions
+        if features_tensor.shape[-1] < model.config.num_labels:  # Ensure sufficient length for model input
+            padding_size = model.config.num_labels - features_tensor.shape[-1]
+            features_tensor = torch.nn.functional.pad(features_tensor, (0, padding_size), "constant", 0)
         with torch.no_grad():
             outputs = model(features_tensor)