File size: 1,706 Bytes
dfabd2f
30a5efb
0a26e54
30a5efb
af80923
30a5efb
f0dd070
 
ee91d94
53b1abc
 
af80923
0c35856
14ac9f5
af80923
30c595f
af80923
50facbf
14ac9f5
af80923
 
411539a
50facbf
411539a
af80923
 
dfabd2f
53b1abc
af80923
50facbf
af80923
 
 
50facbf
637d0ca
50facbf
e8e81bf
637d0ca
 
53b1abc
637d0ca
ee91d94
15eca51
637d0ca
30c595f
50facbf
637d0ca
805e4a6
af80923
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import numpy as np
import torch
import librosa
import gradio as gr
from transformers import AutoModelForAudioClassification, Wav2Vec2Processor
import logging

logging.basicConfig(level=logging.INFO)

model_path = "./"
model = AutoModelForAudioClassification.from_pretrained(model_path)
processor = Wav2Vec2Processor.from_pretrained(model_path)

def preprocess_audio(audio_path, sr=16000):
    audio, _ = librosa.load(audio_path, sr=sr)
    audio, _ = librosa.effects.trim(audio)
    return audio

def extract_features(audio, sr=16000):
    inputs = processor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
    return inputs

def predict_voice(audio_file_path):
    try:
        audio = preprocess_audio(audio_file_path)
        features = extract_features(audio)
        
        with torch.no_grad():
            outputs = model(**features)
            logits = outputs.logits
            predicted_index = logits.argmax(dim=-1)
            label = processor.decode(predicted_index)
            confidence = torch.softmax(logits, dim=-1).max().item() * 100
        
        result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
        logging.info("Prediction successful.")
    except Exception as e:
        result = f"Error during processing: {e}"
        logging.error(result)

    return result

iface = gr.Interface(
    fn=predict_voice,
    inputs=gr.Audio(label="Upload Audio File", type="filepath"),
    outputs=gr.Text(label="Prediction"),
    title="Voice Authenticity Detection",
    description="This system uses advanced audio processing to detect whether a voice is real or AI-generated. Upload an audio file to see the results."
).launch()