Kabatubare commited on
Commit
30c595f
·
verified ·
1 Parent(s): 8d34e4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -12
app.py CHANGED
@@ -10,20 +10,43 @@ logging.basicConfig(level=logging.INFO)
10
  model_path = "./"
11
  model = AutoModelForAudioClassification.from_pretrained(model_path)
12
 
13
- def preprocess_audio(audio_file_path, sr=16000):
14
- waveform, _ = librosa.load(audio_file_path, sr=sr)
15
- waveform = librosa.effects.trim(waveform)[0] # Trim silence
16
- return waveform
 
 
17
 
18
- def extract_features(waveform, sr=16000, n_mels=128, n_fft=2048, hop_length=512):
19
- S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
20
- S_DB = librosa.power_to_db(S, ref=np.max)
21
- return torch.tensor(S_DB).float().unsqueeze(0) # Add batch dimension
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def predict_voice(audio_file_path):
24
  try:
25
- waveform = preprocess_audio(audio_file_path)
26
- features = extract_features(waveform)
27
 
28
  with torch.no_grad():
29
  outputs = model(features)
@@ -42,10 +65,10 @@ def predict_voice(audio_file_path):
42
 
43
  iface = gr.Interface(
44
  fn=predict_voice,
45
- inputs=gr.Audio(label="Upload Audio File", type="filepath"), # Corrected 'type' parameter
46
  outputs=gr.Text(label="Prediction"),
47
  title="Voice Authenticity Detection",
48
- description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results."
49
  )
50
 
51
  iface.launch()
 
10
  model_path = "./"
11
  model = AutoModelForAudioClassification.from_pretrained(model_path)
12
 
13
+ def preprocess_audio(audio_path, sr=22050):
14
+ # Load audio file
15
+ audio, sr = librosa.load(audio_path, sr=sr)
16
+ # Trim silence
17
+ audio, _ = librosa.effects.trim(audio)
18
+ return audio, sr
19
 
20
+ def extract_features(audio, sr):
21
+ # Get Mel-spectrogram
22
+ S = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
23
+ log_S = librosa.power_to_db(S, ref=np.max)
24
+
25
+ # Harmonic-Percussive source separation
26
+ y_harmonic, y_percussive = librosa.effects.hpss(audio)
27
+
28
+ # Tempo, beat frames
29
+ tempo, beat_frames = librosa.beat.beat_track(y=audio, sr=sr)
30
+
31
+ # Chroma feature
32
+ chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
33
+
34
+ # Spectral contrast
35
+ contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
36
+
37
+ # Tonnetz
38
+ tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audio), sr=sr)
39
+
40
+ # Concatenate all features
41
+ features = np.vstack([log_S, chroma, contrast, tonnetz])
42
+ features = torch.tensor(features).float().unsqueeze(0) # Add batch dimension
43
+
44
+ return features
45
 
46
  def predict_voice(audio_file_path):
47
  try:
48
+ audio, sr = preprocess_audio(audio_file_path)
49
+ features = extract_features(audio, sr)
50
 
51
  with torch.no_grad():
52
  outputs = model(features)
 
65
 
66
  iface = gr.Interface(
67
  fn=predict_voice,
68
+ inputs=gr.Audio(label="Upload Audio File", type="filepath"),
69
  outputs=gr.Text(label="Prediction"),
70
  title="Voice Authenticity Detection",
71
+ description="This system uses advanced audio processing to detect whether a voice is real or AI-generated. Upload an audio file to see the results."
72
  )
73
 
74
  iface.launch()