import gradio as gr import torch import torchaudio from transformers import AutoFeatureExtractor, AutoModelForAudioClassification # Load the HF feature extractor and model feature_extractor = AutoFeatureExtractor.from_pretrained( "MelodyMachine/Deepfake-audio-detection-V2" ) model = AutoModelForAudioClassification.from_pretrained( "MelodyMachine/Deepfake-audio-detection-V2" ) def detect_deepfake_audio(audio_path: str) -> str: # Load audio file waveform, sample_rate = torchaudio.load(audio_path) # Mix to mono if necessary if waveform.shape[0] > 1: waveform = torch.mean(waveform, dim=0, keepdim=True) # Prepare inputs inputs = feature_extractor( waveform, sampling_rate=sample_rate, return_tensors="pt" ) with torch.no_grad(): outputs = model(**inputs) # Compute probabilities probs = torch.softmax(outputs.logits, dim=-1)[0] idx = torch.argmax(probs).item() label = model.config.id2label[idx] confidence = probs[idx].item() return f"The audio is classified as **{label}** with confidence **{confidence:.2f}**" # Build the Gradio Blocks interface with gr.Blocks() as demo: gr.Markdown("# Audio Deepfake Detection") gr.Markdown("Upload an audio clip to check for deepfake content.") audio_in = gr.Audio(type="filepath", label="Select Audio File") txt_out = gr.Textbox(label="Result") gr.Button("Detect").click( fn=detect_deepfake_audio, inputs=audio_in, outputs=txt_out ) if __name__ == "__main__": demo.launch()