Kabatubare commited on
Commit
af80923
·
verified ·
1 Parent(s): 14ac9f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -23
app.py CHANGED
@@ -2,43 +2,35 @@ import numpy as np
2
  import torch
3
  import librosa
4
  import gradio as gr
5
- from transformers import AutoModelForAudioClassification
6
  import logging
7
 
8
  logging.basicConfig(level=logging.INFO)
9
 
10
  model_path = "./"
11
  model = AutoModelForAudioClassification.from_pretrained(model_path)
 
12
 
13
  def preprocess_audio(audio_path, sr=16000):
14
- # Load the audio file. Note: Adjusting the sample rate (sr) to match the model's expected input
15
- audio, sr = librosa.load(audio_path, sr=sr)
16
- # Trim silence from the beginning and the end
17
  audio, _ = librosa.effects.trim(audio)
18
- return audio, sr
19
 
20
  def extract_features(audio, sr=16000):
21
- # Compute the Mel spectrogram
22
- S = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128, hop_length=512, n_fft=2048)
23
- # Convert to dB scale
24
- S_DB = librosa.power_to_db(S, ref=np.max)
25
- # Normally, further feature extraction steps would be here. For this model, we will directly use S_DB.
26
- return S_DB
27
 
28
  def predict_voice(audio_file_path):
29
  try:
30
- audio, sr = preprocess_audio(audio_file_path)
31
- S_DB = extract_features(audio, sr)
32
-
33
- # Convert S_DB to tensor and add required batch dimension
34
- S_DB_tensor = torch.tensor(S_DB).unsqueeze(0)
35
 
36
  with torch.no_grad():
37
- outputs = model(S_DB_tensor)
38
  logits = outputs.logits
39
- predicted_index = logits.argmax()
40
- label = model.config.id2label[predicted_index.item()]
41
- confidence = torch.softmax(logits, dim=1).max().item() * 100
42
 
43
  result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
44
  logging.info("Prediction successful.")
@@ -54,6 +46,4 @@ iface = gr.Interface(
54
  outputs=gr.Text(label="Prediction"),
55
  title="Voice Authenticity Detection",
56
  description="This system uses advanced audio processing to detect whether a voice is real or AI-generated. Upload an audio file to see the results."
57
- )
58
-
59
- iface.launch()
 
2
  import torch
3
  import librosa
4
  import gradio as gr
5
+ from transformers import AutoModelForAudioClassification, Wav2Vec2Processor
6
  import logging
7
 
8
  logging.basicConfig(level=logging.INFO)
9
 
10
  model_path = "./"
11
  model = AutoModelForAudioClassification.from_pretrained(model_path)
12
+ processor = Wav2Vec2Processor.from_pretrained(model_path)
13
 
14
  def preprocess_audio(audio_path, sr=16000):
15
+ audio, _ = librosa.load(audio_path, sr=sr)
 
 
16
  audio, _ = librosa.effects.trim(audio)
17
+ return audio
18
 
19
  def extract_features(audio, sr=16000):
20
+ inputs = processor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
21
+ return inputs
 
 
 
 
22
 
23
  def predict_voice(audio_file_path):
24
  try:
25
+ audio = preprocess_audio(audio_file_path)
26
+ features = extract_features(audio)
 
 
 
27
 
28
  with torch.no_grad():
29
+ outputs = model(**features)
30
  logits = outputs.logits
31
+ predicted_index = logits.argmax(dim=-1)
32
+ label = processor.decode(predicted_index)
33
+ confidence = torch.softmax(logits, dim=-1).max().item() * 100
34
 
35
  result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
36
  logging.info("Prediction successful.")
 
46
  outputs=gr.Text(label="Prediction"),
47
  title="Voice Authenticity Detection",
48
  description="This system uses advanced audio processing to detect whether a voice is real or AI-generated. Upload an audio file to see the results."
49
+ ).launch()