Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
|
3 |
-
import numpy as np
|
4 |
import torch
|
5 |
from torch.nn.functional import softmax
|
6 |
import librosa
|
@@ -24,16 +23,27 @@ def safe_path_join(base_path, path):
|
|
24 |
Returns:
|
25 |
The safely joined path if it's a subpath of the base_path, otherwise None.
|
26 |
"""
|
27 |
-
# Normalize and absolute both paths
|
28 |
base_path = os.path.abspath(os.path.normpath(base_path))
|
29 |
target_path = os.path.abspath(os.path.normpath(os.path.join(base_path, path)))
|
30 |
-
|
31 |
-
# Ensure the target path is within the base_path directory
|
32 |
if os.path.commonpath([base_path]) == os.path.commonpath([base_path, target_path]):
|
33 |
return target_path
|
34 |
else:
|
35 |
return None
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
def predict_voice(audio_file_path):
|
38 |
"""
|
39 |
Predicts whether a voice is real or spoofed from an audio file.
|
@@ -44,19 +54,17 @@ def predict_voice(audio_file_path):
|
|
44 |
Returns:
|
45 |
A string with the prediction and confidence level.
|
46 |
"""
|
47 |
-
# Safety check and path normalization
|
48 |
expected_base_path = "/expected/path/for/safety"
|
49 |
safe_audio_file_path = safe_path_join(expected_base_path, audio_file_path)
|
50 |
|
51 |
if not safe_audio_file_path:
|
52 |
return "Error: Invalid file path."
|
53 |
-
|
54 |
try:
|
55 |
-
|
56 |
-
waveform, sample_rate = librosa.load(safe_audio_file_path, sr=16000, mono=True)
|
57 |
inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
|
58 |
|
59 |
-
with torch.no_grad():
|
60 |
outputs = model(**inputs)
|
61 |
|
62 |
logits = outputs.logits
|
@@ -70,7 +78,6 @@ def predict_voice(audio_file_path):
|
|
70 |
|
71 |
return result
|
72 |
|
73 |
-
# Gradio interface setup with enhancements for scalability and performance
|
74 |
iface = gr.Interface(
|
75 |
fn=predict_voice,
|
76 |
inputs=gr.Audio(label="Upload Audio File", type="filepath"),
|
@@ -78,7 +85,7 @@ iface = gr.Interface(
|
|
78 |
title="Voice Authenticity Detection",
|
79 |
description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
|
80 |
theme="huggingface",
|
81 |
-
enable_queue=True
|
82 |
)
|
83 |
|
84 |
iface.launch(share=True)
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
|
|
|
3 |
import torch
|
4 |
from torch.nn.functional import softmax
|
5 |
import librosa
|
|
|
23 |
Returns:
|
24 |
The safely joined path if it's a subpath of the base_path, otherwise None.
|
25 |
"""
|
|
|
26 |
base_path = os.path.abspath(os.path.normpath(base_path))
|
27 |
target_path = os.path.abspath(os.path.normpath(os.path.join(base_path, path)))
|
|
|
|
|
28 |
if os.path.commonpath([base_path]) == os.path.commonpath([base_path, target_path]):
|
29 |
return target_path
|
30 |
else:
|
31 |
return None
|
32 |
|
33 |
+
def preprocess_audio(audio_file_path, target_sample_rate=16000):
|
34 |
+
"""
|
35 |
+
Preprocesses the audio file for compatibility with the model's expectations.
|
36 |
+
|
37 |
+
Args:
|
38 |
+
audio_file_path: Path to the audio file.
|
39 |
+
target_sample_rate: Desired sample rate compatible with the model.
|
40 |
+
|
41 |
+
Returns:
|
42 |
+
Processed waveform and sample rate.
|
43 |
+
"""
|
44 |
+
waveform, _ = librosa.load(audio_file_path, sr=target_sample_rate, mono=True)
|
45 |
+
return waveform, target_sample_rate
|
46 |
+
|
47 |
def predict_voice(audio_file_path):
|
48 |
"""
|
49 |
Predicts whether a voice is real or spoofed from an audio file.
|
|
|
54 |
Returns:
|
55 |
A string with the prediction and confidence level.
|
56 |
"""
|
|
|
57 |
expected_base_path = "/expected/path/for/safety"
|
58 |
safe_audio_file_path = safe_path_join(expected_base_path, audio_file_path)
|
59 |
|
60 |
if not safe_audio_file_path:
|
61 |
return "Error: Invalid file path."
|
62 |
+
|
63 |
try:
|
64 |
+
waveform, sample_rate = preprocess_audio(safe_audio_file_path)
|
|
|
65 |
inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
|
66 |
|
67 |
+
with torch.no_grad():
|
68 |
outputs = model(**inputs)
|
69 |
|
70 |
logits = outputs.logits
|
|
|
78 |
|
79 |
return result
|
80 |
|
|
|
81 |
iface = gr.Interface(
|
82 |
fn=predict_voice,
|
83 |
inputs=gr.Audio(label="Upload Audio File", type="filepath"),
|
|
|
85 |
title="Voice Authenticity Detection",
|
86 |
description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
|
87 |
theme="huggingface",
|
88 |
+
enable_queue=True
|
89 |
)
|
90 |
|
91 |
iface.launch(share=True)
|