import streamlit as st
import numpy as np
import pandas as pd
import librosa
from io import BytesIO
import matplotlib.pyplot as plt
from src.features.extraction.low_level_features_extractor import LowLevelFeatureExtractor
from src.features.extraction.high_level_features_extractor import HighLevelFeatureExtractor
from src.models.predict import predict
# Set page layout
st.set_page_config(page_title="Audio Deepfake Detection", layout="wide")
# Add a custom style for background and font
st.markdown("""
""", unsafe_allow_html=True)
# App title
st.markdown('
Audio Deepfake Detection
', unsafe_allow_html=True)
st.write("This application helps you detect whether an audio file is a deepfake or genuine.")
# File uploader
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"])
# Extract features from audio
def extract_features(audio_data, sample_rate):
df = pd.DataFrame({
'audio_id': [0],
'audio_arr': [audio_data],
'srate': [sample_rate],
'real_or_fake': [0]
})
audio_processor = LowLevelFeatureExtractor(target_sr=16000, include_only=['spectral', 'prosodic', 'voice_quality'])
feature_computer = HighLevelFeatureExtractor()
low_level_gen = audio_processor.low_level_feature_generator(df)
high_level_features = list(feature_computer.high_level_feature_generator(low_level_gen))
features_df = pd.DataFrame(high_level_features)
return features_df
# Plot waveform
def plot_waveform(audio_data, sample_rate):
fig, ax = plt.subplots(figsize=(10, 2)) # Wide and short waveform plot
ax.plot(np.linspace(0, len(audio_data) / sample_rate, len(audio_data)), audio_data)
ax.set_xlabel("Time (s)")
ax.set_ylabel("Amplitude")
st.pyplot(fig)
# Process the uploaded file
if uploaded_file is not None:
# Use columns to display the audio player, waveform, prediction, and confidence side by side
col1, col2 = st.columns(2)
with col1:
st.subheader("Uploaded Audio")
st.audio(uploaded_file)
# Show waveform
st.subheader("Audio Waveform")
audio_bytes = uploaded_file.read()
audio_data, sample_rate = librosa.load(BytesIO(audio_bytes), sr=None)
plot_waveform(audio_data, sample_rate)
with col2:
# Extract features
features_df = extract_features(audio_data, sample_rate)
predictions, prediction_probabilities = predict(features_df)
# Display prediction and confidence score
st.subheader("Prediction Results")
prediction = predictions[0]
confidence_score = prediction_probabilities[0][1] * 100
if prediction == 1:
st.error("This audio is classified as a Deepfake!")
else:
st.success("This audio is classified as Genuine!")
# Show confidence score using a progress bar
st.markdown('Confidence Score
', unsafe_allow_html=True)
st.progress(confidence_score / 100)
st.write(f"The model is {confidence_score:.2f}% confident in its prediction.")
# Footer or additional information
st.markdown('', unsafe_allow_html=True)
st.write("""
This app uses machine learning models trained on various audio features, such as spectral, prosodic, and voice quality metrics.
It analyzes the audio to classify whether it is a genuine recording or a deepfake, providing a confidence score for its prediction.
""")