File size: 1,549 Bytes
dd8355e
 
 
a9277e0
dd8355e
824db7b
a9277e0
 
 
 
 
 
dd8355e
 
824db7b
dd8355e
 
824db7b
dd8355e
 
 
824db7b
a9277e0
 
 
dd8355e
 
 
824db7b
dd8355e
 
 
 
 
 
 
824db7b
dd8355e
a9277e0
 
824db7b
dd8355e
a9277e0
 
 
dd8355e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
import torch
import torchaudio
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification

# Load the HF feature extractor and model
feature_extractor = AutoFeatureExtractor.from_pretrained(
    "MelodyMachine/Deepfake-audio-detection-V2"
)
model = AutoModelForAudioClassification.from_pretrained(
    "MelodyMachine/Deepfake-audio-detection-V2"
)

def detect_deepfake_audio(audio_path: str) -> str:
    # Load audio file
    waveform, sample_rate = torchaudio.load(audio_path)

    # Mix to mono if necessary
    if waveform.shape[0] > 1:
        waveform = torch.mean(waveform, dim=0, keepdim=True)

    # Prepare inputs
    inputs = feature_extractor(
        waveform, sampling_rate=sample_rate, return_tensors="pt"
    )
    with torch.no_grad():
        outputs = model(**inputs)

    # Compute probabilities
    probs = torch.softmax(outputs.logits, dim=-1)[0]
    idx = torch.argmax(probs).item()
    label = model.config.id2label[idx]
    confidence = probs[idx].item()

    return f"The audio is classified as **{label}** with confidence **{confidence:.2f}**"

# Build the Gradio Blocks interface
with gr.Blocks() as demo:
    gr.Markdown("# Audio Deepfake Detection")
    gr.Markdown("Upload an audio clip to check for deepfake content.")
    audio_in = gr.Audio(type="filepath", label="Select Audio File")
    txt_out = gr.Textbox(label="Result")
    gr.Button("Detect").click(
        fn=detect_deepfake_audio, inputs=audio_in, outputs=txt_out
    )

if __name__ == "__main__":
    demo.launch()