Spaces:

reagvis
/

audio-detector

Running

App Files Files Community

audio-detector / app.py

reagvis

Update app.py

b9ec101 verified 28 days ago

raw

history blame

1.81 kB

	import gradio as gr
	import torch
	import torchaudio
	from torchaudio.transforms import Resample
	from transformers import AutoFeatureExtractor, AutoModelForAudioClassification

	# Load the HF feature extractor and model
	feature_extractor = AutoFeatureExtractor.from_pretrained(
	"MelodyMachine/Deepfake-audio-detection-V2"
	)
	model = AutoModelForAudioClassification.from_pretrained(
	"MelodyMachine/Deepfake-audio-detection-V2"
	)

	TARGET_SR = feature_extractor.sampling_rate # should be 16000

	def detect_deepfake_audio(audio_path: str) -> str:
	# Load audio file
	waveform, orig_sr = torchaudio.load(audio_path)

	# Mix to mono if necessary
	if waveform.shape[0] > 1:
	waveform = torch.mean(waveform, dim=0, keepdim=True)

	# Resample if not already 16 kHz
	if orig_sr != TARGET_SR:
	resampler = Resample(orig_sr, TARGET_SR)
	waveform = resampler(waveform)

	# Prepare inputs
	inputs = feature_extractor(
	waveform, sampling_rate=TARGET_SR, return_tensors="pt"
	)
	with torch.no_grad():
	outputs = model(**inputs)

	# Compute probabilities
	probs = torch.softmax(outputs.logits, dim=-1)[0]
	idx = torch.argmax(probs).item()
	label = model.config.id2label[idx]
	confidence = probs[idx].item()

	return f"The audio is classified as {label} with confidence {confidence:.2f}"

	# Build the Gradio Blocks interface
	with gr.Blocks() as demo:
	gr.Markdown("# Audio Deepfake Detection")
	gr.Markdown("Upload an audio clip to check for deepfake content.")
	audio_in = gr.Audio(type="filepath", label="Select Audio File")
	txt_out = gr.Textbox(label="Result")
	gr.Button("Detect").click(
	fn=detect_deepfake_audio, inputs=audio_in, outputs=txt_out
	)

	if __name__ == "__main__":
	demo.launch()