Kabatubare's picture
Update app.py
24baf79 verified
raw
history blame
1.87 kB
import gradio as gr
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
import numpy as np
# Path to the local directory where the model files are stored within the Space
local_model_path = "./"
# Initialize the feature extractor and model from the local files
extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
model = AutoModelForAudioClassification.from_pretrained(local_model_path)
def predict_voice(audio_file):
"""
Predicts whether a voice is real or spoofed from an audio file.
Args:
audio_file: The input audio file to be classified.
Returns:
A string with the prediction and confidence level.
"""
# Convert the input audio file to model's expected format.
inputs = extractor(audio_file, return_tensors="pt")
# Generate predictions from the model.
outputs = model(**inputs)
# Extract logits and compute the class with the highest score.
logits = outputs.logits
predicted_index = np.argmax(logits.detach().numpy())
# Translate index to label
label = model.config.id2label[predicted_index]
# Calculate the confidence of the prediction.
confidence = np.max(np.softmax(logits.detach().numpy(), axis=1)) * 100
# Prepare the output string.
result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
return result
# Setting up the Gradio interface
iface = gr.Interface(
fn=predict_voice, # Function to call
inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio File"), # Audio input
outputs="text", # Text output
title="Voice Authenticity Detection",
description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
theme="huggingface"
)
# Run the Gradio interface
iface.launch()