Kabatubare commited on
Commit
0a26e54
·
verified ·
1 Parent(s): 3b392fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -3,6 +3,7 @@ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
3
  import numpy as np
4
  import torch
5
  from torch.nn.functional import softmax
 
6
  import soundfile as sf
7
 
8
  # Path to the local directory where the model files are stored within the Space
@@ -17,14 +18,18 @@ def predict_voice(audio_file_path):
17
  Predicts whether a voice is real or spoofed from an audio file.
18
 
19
  Args:
20
- audio_file_path: The file path of the uploaded audio file to be classified.
21
 
22
  Returns:
23
  A string with the prediction and confidence level.
24
  """
25
 
26
- # Load the audio file. Adjust the loading mechanism based on your audio file format.
27
- waveform, sample_rate = sf.read(audio_file_path)
 
 
 
 
28
 
29
  # Convert the input audio file to model's expected format.
30
  inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
@@ -47,14 +52,17 @@ def predict_voice(audio_file_path):
47
  result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
48
  return result
49
 
 
50
  iface = gr.Interface(
51
  fn=predict_voice,
52
- inputs=gr.Audio(type="filepath", label="Upload Audio File"), # Ensure filepath is passed
53
  outputs=gr.Textbox(label="Prediction"),
54
  title="Voice Authenticity Detection",
55
  description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
56
  theme="huggingface"
57
  )
58
 
59
- # Run the Gradio interface
 
 
60
  iface.launch()
 
3
  import numpy as np
4
  import torch
5
  from torch.nn.functional import softmax
6
+ import librosa
7
  import soundfile as sf
8
 
9
  # Path to the local directory where the model files are stored within the Space
 
18
  Predicts whether a voice is real or spoofed from an audio file.
19
 
20
  Args:
21
+ audio_file_path: The path to the input audio file to be classified.
22
 
23
  Returns:
24
  A string with the prediction and confidence level.
25
  """
26
 
27
+ # Load the audio file. librosa automatically resamples to the target sample rate if needed.
28
+ waveform, sample_rate = librosa.load(audio_file_path, sr=16000) # Force resampling to 16000 Hz
29
+
30
+ # Ensure waveform is mono
31
+ if len(waveform.shape) > 1:
32
+ waveform = np.mean(waveform, axis=0)
33
 
34
  # Convert the input audio file to model's expected format.
35
  inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
 
52
  result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
53
  return result
54
 
55
+ # Setting up the Gradio interface
56
  iface = gr.Interface(
57
  fn=predict_voice,
58
+ inputs=gr.Audio(source="upload", type="filepath", label="Upload Audio File"),
59
  outputs=gr.Textbox(label="Prediction"),
60
  title="Voice Authenticity Detection",
61
  description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
62
  theme="huggingface"
63
  )
64
 
65
+ # Run the Gradio interface with share=True for creating a public link
66
+ iface.launch(share=True)
67
+ io interface
68
  iface.launch()