Spaces:
Running
Running
File size: 1,557 Bytes
dd8355e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import gradio as gr
import torch
import torchaudio
from transformers import AutoProcessor, AutoModelForAudioClassification
# Load the Hugging Face processor and model for audio deepfake detection.
processor = AutoProcessor.from_pretrained("MelodyMachine/Deepfake-audio-detection-V2")
model = AutoModelForAudioClassification.from_pretrained("MelodyMachine/Deepfake-audio-detection-V2")
def detect_deepfake_audio(audio_path: str) -> str:
# Load audio (supports WAV, MP3, FLAC, etc.)
waveform, sample_rate = torchaudio.load(audio_path)
# Convert to mono if necessary
if waveform.shape[0] > 1:
waveform = torch.mean(waveform, dim=0, keepdim=True)
# Preprocess for model
inputs = processor(waveform, sampling_rate=sample_rate, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
# Get prediction
probs = torch.softmax(outputs.logits, dim=-1)[0]
idx = torch.argmax(probs).item()
label = model.config.id2label[idx]
confidence = probs[idx].item()
return f"The audio is classified as **{label}** with confidence **{confidence:.2f}**"
# Build Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Audio Deepfake Detection App")
gr.Markdown("### Upload or record an audio clip to detect deepfake content.")
audio_in = gr.Audio(source="upload", type="filepath", label="Upload Audio")
txt_out = gr.Textbox(label="Result")
gr.Button("Detect").click(fn=detect_deepfake_audio, inputs=audio_in, outputs=txt_out)
if __name__ == "__main__":
demo.launch()
|