Kabatubare commited on
Commit
eb22fab
·
verified ·
1 Parent(s): 245b81a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -36,11 +36,12 @@ def augment_and_extract_features(audio_path, sr=16000, n_mfcc=40, n_fft=2048, ho
36
  def predict_voice(audio_file_path):
37
  try:
38
  features_tensor = augment_and_extract_features(audio_file_path)
39
- # Adjust model input size or preprocessing to avoid size mismatch with convolution kernel
40
- if features_tensor.dim() < 4: # Ensure tensor is 4D (batch, channel, height, width) for CNNs
41
- features_tensor = features_tensor.unsqueeze(1) # Add a channel dimension if missing
42
- # Apply adaptive pooling to match model expected input size if necessary
43
- features_tensor = torch.nn.AdaptiveAvgPool2d((model.config.num_labels, model.config.num_labels))(features_tensor)
 
44
  with torch.no_grad():
45
  outputs = model(features_tensor)
46
 
 
36
  def predict_voice(audio_file_path):
37
  try:
38
  features_tensor = augment_and_extract_features(audio_file_path)
39
+ # Correct the tensor shape to match expected input format for convolutional layers
40
+ if features_tensor.dim() > 4: # Check if tensor has extra dimensions
41
+ features_tensor = features_tensor.squeeze() # Remove unnecessary dimensions
42
+ if features_tensor.shape[-1] < model.config.num_labels: # Ensure sufficient length for model input
43
+ padding_size = model.config.num_labels - features_tensor.shape[-1]
44
+ features_tensor = torch.nn.functional.pad(features_tensor, (0, padding_size), "constant", 0)
45
  with torch.no_grad():
46
  outputs = model(features_tensor)
47