Kabatubare commited on
Commit
8a834c6
·
verified ·
1 Parent(s): df3ef47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -11
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
3
- import numpy as np
4
  import torch
5
  from torch.nn.functional import softmax
6
  import librosa
@@ -24,16 +23,27 @@ def safe_path_join(base_path, path):
24
  Returns:
25
  The safely joined path if it's a subpath of the base_path, otherwise None.
26
  """
27
- # Normalize and absolute both paths
28
  base_path = os.path.abspath(os.path.normpath(base_path))
29
  target_path = os.path.abspath(os.path.normpath(os.path.join(base_path, path)))
30
-
31
- # Ensure the target path is within the base_path directory
32
  if os.path.commonpath([base_path]) == os.path.commonpath([base_path, target_path]):
33
  return target_path
34
  else:
35
  return None
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def predict_voice(audio_file_path):
38
  """
39
  Predicts whether a voice is real or spoofed from an audio file.
@@ -44,19 +54,17 @@ def predict_voice(audio_file_path):
44
  Returns:
45
  A string with the prediction and confidence level.
46
  """
47
- # Safety check and path normalization
48
  expected_base_path = "/expected/path/for/safety"
49
  safe_audio_file_path = safe_path_join(expected_base_path, audio_file_path)
50
 
51
  if not safe_audio_file_path:
52
  return "Error: Invalid file path."
53
-
54
  try:
55
- # Load and preprocess the audio file
56
- waveform, sample_rate = librosa.load(safe_audio_file_path, sr=16000, mono=True)
57
  inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
58
 
59
- with torch.no_grad(): # No gradients needed
60
  outputs = model(**inputs)
61
 
62
  logits = outputs.logits
@@ -70,7 +78,6 @@ def predict_voice(audio_file_path):
70
 
71
  return result
72
 
73
- # Gradio interface setup with enhancements for scalability and performance
74
  iface = gr.Interface(
75
  fn=predict_voice,
76
  inputs=gr.Audio(label="Upload Audio File", type="filepath"),
@@ -78,7 +85,7 @@ iface = gr.Interface(
78
  title="Voice Authenticity Detection",
79
  description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
80
  theme="huggingface",
81
- enable_queue=True # Enable queuing to handle high traffic efficiently
82
  )
83
 
84
  iface.launch(share=True)
 
1
  import gradio as gr
2
  from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
 
3
  import torch
4
  from torch.nn.functional import softmax
5
  import librosa
 
23
  Returns:
24
  The safely joined path if it's a subpath of the base_path, otherwise None.
25
  """
 
26
  base_path = os.path.abspath(os.path.normpath(base_path))
27
  target_path = os.path.abspath(os.path.normpath(os.path.join(base_path, path)))
 
 
28
  if os.path.commonpath([base_path]) == os.path.commonpath([base_path, target_path]):
29
  return target_path
30
  else:
31
  return None
32
 
33
+ def preprocess_audio(audio_file_path, target_sample_rate=16000):
34
+ """
35
+ Preprocesses the audio file for compatibility with the model's expectations.
36
+
37
+ Args:
38
+ audio_file_path: Path to the audio file.
39
+ target_sample_rate: Desired sample rate compatible with the model.
40
+
41
+ Returns:
42
+ Processed waveform and sample rate.
43
+ """
44
+ waveform, _ = librosa.load(audio_file_path, sr=target_sample_rate, mono=True)
45
+ return waveform, target_sample_rate
46
+
47
  def predict_voice(audio_file_path):
48
  """
49
  Predicts whether a voice is real or spoofed from an audio file.
 
54
  Returns:
55
  A string with the prediction and confidence level.
56
  """
 
57
  expected_base_path = "/expected/path/for/safety"
58
  safe_audio_file_path = safe_path_join(expected_base_path, audio_file_path)
59
 
60
  if not safe_audio_file_path:
61
  return "Error: Invalid file path."
62
+
63
  try:
64
+ waveform, sample_rate = preprocess_audio(safe_audio_file_path)
 
65
  inputs = extractor(waveform, return_tensors="pt", sampling_rate=sample_rate)
66
 
67
+ with torch.no_grad():
68
  outputs = model(**inputs)
69
 
70
  logits = outputs.logits
 
78
 
79
  return result
80
 
 
81
  iface = gr.Interface(
82
  fn=predict_voice,
83
  inputs=gr.Audio(label="Upload Audio File", type="filepath"),
 
85
  title="Voice Authenticity Detection",
86
  description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
87
  theme="huggingface",
88
+ enable_queue=True
89
  )
90
 
91
  iface.launch(share=True)