audio-detector / app.py
reagvis's picture
Update app.py
b9ec101 verified
raw
history blame
1.81 kB
import gradio as gr
import torch
import torchaudio
from torchaudio.transforms import Resample
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
# Load the HF feature extractor and model
feature_extractor = AutoFeatureExtractor.from_pretrained(
"MelodyMachine/Deepfake-audio-detection-V2"
)
model = AutoModelForAudioClassification.from_pretrained(
"MelodyMachine/Deepfake-audio-detection-V2"
)
TARGET_SR = feature_extractor.sampling_rate # should be 16000
def detect_deepfake_audio(audio_path: str) -> str:
# Load audio file
waveform, orig_sr = torchaudio.load(audio_path)
# Mix to mono if necessary
if waveform.shape[0] > 1:
waveform = torch.mean(waveform, dim=0, keepdim=True)
# Resample if not already 16 kHz
if orig_sr != TARGET_SR:
resampler = Resample(orig_sr, TARGET_SR)
waveform = resampler(waveform)
# Prepare inputs
inputs = feature_extractor(
waveform, sampling_rate=TARGET_SR, return_tensors="pt"
)
with torch.no_grad():
outputs = model(**inputs)
# Compute probabilities
probs = torch.softmax(outputs.logits, dim=-1)[0]
idx = torch.argmax(probs).item()
label = model.config.id2label[idx]
confidence = probs[idx].item()
return f"The audio is classified as **{label}** with confidence **{confidence:.2f}**"
# Build the Gradio Blocks interface
with gr.Blocks() as demo:
gr.Markdown("# Audio Deepfake Detection")
gr.Markdown("Upload an audio clip to check for deepfake content.")
audio_in = gr.Audio(type="filepath", label="Select Audio File")
txt_out = gr.Textbox(label="Result")
gr.Button("Detect").click(
fn=detect_deepfake_audio, inputs=audio_in, outputs=txt_out
)
if __name__ == "__main__":
demo.launch()