Spaces:
Runtime error
Runtime error
File size: 1,806 Bytes
ee91d94 24baf79 cea8753 0a26e54 1364a7f ee91d94 24baf79 0c35856 8a834c6 1364a7f 8a834c6 1364a7f 8a834c6 3b392fa df3ef47 9ff14b4 1364a7f e8e81bf 8a834c6 e8e81bf df3ef47 e8e81bf 1364a7f e8e81bf 1364a7f df3ef47 24baf79 ee91d94 cea8753 e8e81bf cea8753 0c35856 9ff14b4 ee91d94 9ff14b4 1364a7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
import torch
import librosa
import numpy as np
local_model_path = "./"
extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
model = AutoModelForAudioClassification.from_pretrained(local_model_path)
def preprocess_audio(audio_file_path, target_sample_rate=16000):
# Load the audio file, ensuring mono conversion
waveform, _ = librosa.load(audio_file_path, sr=target_sample_rate, mono=True)
# Normalizing waveform to be between -1 and 1
waveform = librosa.util.normalize(waveform)
return waveform, target_sample_rate
def predict_voice(audio_file_path):
try:
waveform, sample_rate = preprocess_audio(audio_file_path)
# Ensure waveform is a float32 array
waveform = waveform.astype(np.float32)
inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
predicted_index = logits.argmax()
label = model.config.id2label[predicted_index.item()]
confidence = torch.softmax(logits, dim=1).max().item() * 100
result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
except Exception as e:
# Improved error handling for debugging
result = f"Error during processing: {e}"
return result
iface = gr.Interface(
fn=predict_voice,
inputs=gr.Audio(label="Upload Audio File", type="filepath"),
outputs=gr.Textbox(label="Prediction"),
title="Voice Authenticity Detection",
description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results."
)
iface.launch()
|