Kabatubare commited on
Commit
cea8753
·
verified ·
1 Parent(s): 7859ecd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import gradio as gr
2
  from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
3
  import numpy as np
 
 
4
 
5
  # Path to the local directory where the model files are stored within the Space
6
  local_model_path = "./"
@@ -20,20 +22,24 @@ def predict_voice(audio_file):
20
  A string with the prediction and confidence level.
21
  """
22
  # Convert the input audio file to model's expected format.
23
- inputs = extractor(audio_file, return_tensors="pt")
 
 
 
24
 
25
  # Generate predictions from the model.
26
- outputs = model(**inputs)
 
27
 
28
  # Extract logits and compute the class with the highest score.
29
  logits = outputs.logits
30
- predicted_index = np.argmax(logits.detach().numpy())
31
 
32
  # Translate index to label
33
- label = model.config.id2label[predicted_index]
34
 
35
- # Calculate the confidence of the prediction.
36
- confidence = np.max(np.softmax(logits.detach().numpy(), axis=1)) * 100
37
 
38
  # Prepare the output string.
39
  result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
@@ -41,9 +47,9 @@ def predict_voice(audio_file):
41
 
42
  # Setting up the Gradio interface
43
  iface = gr.Interface(
44
- fn=predict_voice, # Function to call
45
- inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio File"), # Audio input
46
- outputs="text", # Text output
47
  title="Voice Authenticity Detection",
48
  description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
49
  theme="huggingface"
 
1
  import gradio as gr
2
  from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
3
  import numpy as np
4
+ import torch
5
+ from torch.nn.functional import softmax
6
 
7
  # Path to the local directory where the model files are stored within the Space
8
  local_model_path = "./"
 
22
  A string with the prediction and confidence level.
23
  """
24
  # Convert the input audio file to model's expected format.
25
+ # The following code assumes your audio file is a numpy array.
26
+ # You may need to modify this depending on how the audio file is being read.
27
+ waveform = np.array(audio_file)
28
+ inputs = extractor(waveform, return_tensors="pt", sampling_rate=extractor.sampling_rate)
29
 
30
  # Generate predictions from the model.
31
+ with torch.no_grad(): # Ensure no gradients are calculated
32
+ outputs = model(**inputs)
33
 
34
  # Extract logits and compute the class with the highest score.
35
  logits = outputs.logits
36
+ predicted_index = logits.argmax()
37
 
38
  # Translate index to label
39
+ label = model.config.id2label[predicted_index.item()]
40
 
41
+ # Calculate the confidence of the prediction using softmax.
42
+ confidence = softmax(logits, dim=1).max().item() * 100
43
 
44
  # Prepare the output string.
45
  result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
 
47
 
48
  # Setting up the Gradio interface
49
  iface = gr.Interface(
50
+ fn=predict_voice,
51
+ inputs=gr.Audio(source="upload", type="file", label="Upload Audio File"),
52
+ outputs=gr.Textbox(label="Prediction"),
53
  title="Voice Authenticity Detection",
54
  description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
55
  theme="huggingface"