import gradio as gr import torch import torchaudio from transformers import AutoProcessor, AutoModelForAudioClassification # Load the Hugging Face processor and model for audio deepfake detection. processor = AutoProcessor.from_pretrained("MelodyMachine/Deepfake-audio-detection-V2") model = AutoModelForAudioClassification.from_pretrained("MelodyMachine/Deepfake-audio-detection-V2") def detect_deepfake_audio(audio_path: str) -> str: # Load audio (supports WAV, MP3, FLAC, etc.) waveform, sample_rate = torchaudio.load(audio_path) # Convert to mono if necessary if waveform.shape[0] > 1: waveform = torch.mean(waveform, dim=0, keepdim=True) # Preprocess for model inputs = processor(waveform, sampling_rate=sample_rate, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # Get prediction probs = torch.softmax(outputs.logits, dim=-1)[0] idx = torch.argmax(probs).item() label = model.config.id2label[idx] confidence = probs[idx].item() return f"The audio is classified as **{label}** with confidence **{confidence:.2f}**" # Build Gradio interface with gr.Blocks() as demo: gr.Markdown("# Audio Deepfake Detection App") gr.Markdown("### Upload or record an audio clip to detect deepfake content.") audio_in = gr.Audio(source="upload", type="filepath", label="Upload Audio") txt_out = gr.Textbox(label="Result") gr.Button("Detect").click(fn=detect_deepfake_audio, inputs=audio_in, outputs=txt_out) if __name__ == "__main__": demo.launch()