voice_clone_detection

Runtime error

App Files Files Community

Kabatubare commited on Mar 13, 2024

Commit

38963c6

verified ·

1 Parent(s): 49ef139

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -22

app.py CHANGED Viewed

@@ -3,44 +3,80 @@ import librosa
 import numpy as np
 import torch
 import logging
-from transformers import Wav2Vec2ForSequenceClassification
 logging.basicConfig(level=logging.INFO)
 model_path = "./"
 model = Wav2Vec2ForSequenceClassification.from_pretrained(model_path)
-def preprocess_audio(audio_path, target_sr=16000):
-    y, sr = librosa.load(audio_path, sr=target_sr)
-    y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
-    return y
-def predict_voice(audio_file_path):
     try:
-        audio_data = preprocess_audio(audio_file_path)
-        inputs = model.processor(audio_data, sampling_rate=16000, return_tensors="pt", padding=True)
         with torch.no_grad():
             outputs = model(**inputs)
         logits = outputs.logits
         predicted_index = logits.argmax(dim=1).item()
-        label = model.config.id2label[predicted_index]
         confidence = torch.softmax(logits, dim=1).max().item() * 100
-        result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
-        logging.info("Prediction successful.")
     except Exception as e:
-        result = f"Error during processing: {e}"
-        logging.error(result)
-    return result
 iface = gr.Interface(
-    fn=predict_voice,
-    inputs=gr.Audio(label="Upload Audio File", type="filepath"),
-    outputs=gr.Textbox(label="Prediction"),
-    title="Voice Authenticity Detection",
-    description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results."
 )
 iface.launch()

 import numpy as np
 import torch
 import logging
+from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
+from pydub import AudioSegment
+import os
+import tempfile
+import soundfile as sf
+# Setup logging
 logging.basicConfig(level=logging.INFO)
+# Load model and processor
 model_path = "./"
 model = Wav2Vec2ForSequenceClassification.from_pretrained(model_path)
+processor = Wav2Vec2Processor.from_pretrained(model_path)
+def preprocess_audio(audio_file_path, target_sampling_rate=16000):
+    """
+    Preprocess the input audio file to the target sampling rate and format.
+    """
+    # Convert audio to target sampling rate using librosa
+    y, sr = librosa.load(audio_file_path, sr=target_sampling_rate)
+    return y, sr
+def predict_audio_class(audio_file_path):
+    """
+    Predict the class of the input audio file using Wav2Vec 2.0 model.
+    """
     try:
+        # Preprocess audio
+        audio, sr = preprocess_audio(audio_file_path, target_sampling_rate=16000)
+        # Prepare the audio for the model
+        inputs = processor(audio, sampling_rate=sr, return_tensors="pt", padding=True, truncation=True)
+        # Predict
         with torch.no_grad():
             outputs = model(**inputs)
         logits = outputs.logits
         predicted_index = logits.argmax(dim=1).item()
         confidence = torch.softmax(logits, dim=1).max().item() * 100
+        label = model.config.id2label[predicted_index]
+        return f"Predicted class: {label} with confidence: {confidence:.2f}%"
     except Exception as e:
+        logging.error(f"Error during processing: {e}")
+        return "Prediction failed due to an error."
+def save_temp_audio(file):
+    """
+    Saves a temporary audio file, returns the path.
+    """
+    temp_dir = tempfile.gettempdir()
+    temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir, suffix=".wav")
+    temp_file_path = temp_file.name
+    # Convert to WAV for consistency
+    AudioSegment.from_file(file).export(temp_file_path, format="wav")
+    return temp_file_path
+def handle_audio_input(file_info):
+    """
+    Handles the input audio file for prediction.
+    """
+    audio_file_path = save_temp_audio(file_info)
+    prediction = predict_audio_class(audio_file_path)
+    os.unlink(audio_file_path)  # Clean up temp file
+    return prediction
+# Setup Gradio interface
 iface = gr.Interface(
+    fn=handle_audio_input,
+    inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio"),
+    outputs="text",
+    title="Audio Class Prediction",
+    description="Predicts the class of uploaded audio files using a fine-tuned Wav2Vec 2.0 model."
 )
 iface.launch()