File size: 1,806 Bytes
ee91d94
24baf79
cea8753
0a26e54
1364a7f
ee91d94
24baf79
 
 
0c35856
8a834c6
1364a7f
8a834c6
1364a7f
 
8a834c6
 
3b392fa
df3ef47
9ff14b4
1364a7f
 
e8e81bf
 
8a834c6
e8e81bf
df3ef47
e8e81bf
 
 
1364a7f
e8e81bf
 
 
1364a7f
 
df3ef47
24baf79
ee91d94
 
cea8753
e8e81bf
cea8753
0c35856
9ff14b4
ee91d94
 
9ff14b4
1364a7f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import gradio as gr
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
import torch
import librosa
import numpy as np

local_model_path = "./"
extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
model = AutoModelForAudioClassification.from_pretrained(local_model_path)

def preprocess_audio(audio_file_path, target_sample_rate=16000):
    # Load the audio file, ensuring mono conversion
    waveform, _ = librosa.load(audio_file_path, sr=target_sample_rate, mono=True)
    # Normalizing waveform to be between -1 and 1
    waveform = librosa.util.normalize(waveform)
    return waveform, target_sample_rate

def predict_voice(audio_file_path):
    try:
        waveform, sample_rate = preprocess_audio(audio_file_path)
        # Ensure waveform is a float32 array
        waveform = waveform.astype(np.float32)
        inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
        
        with torch.no_grad():
            outputs = model(**inputs)
        
        logits = outputs.logits
        predicted_index = logits.argmax()
        label = model.config.id2label[predicted_index.item()]
        confidence = torch.softmax(logits, dim=1).max().item() * 100
        
        result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
    except Exception as e:
        # Improved error handling for debugging
        result = f"Error during processing: {e}"
    
    return result

iface = gr.Interface(
    fn=predict_voice,
    inputs=gr.Audio(label="Upload Audio File", type="filepath"),
    outputs=gr.Textbox(label="Prediction"),
    title="Voice Authenticity Detection",
    description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results."
)

iface.launch()