File size: 4,009 Bytes
416dc49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import streamlit as st
import numpy as np
import pandas as pd
import librosa
from io import BytesIO
import matplotlib.pyplot as plt

from src.features.extraction.low_level_features_extractor import LowLevelFeatureExtractor
from src.features.extraction.high_level_features_extractor import HighLevelFeatureExtractor
from src.models.predict import predict

# Set page layout
st.set_page_config(page_title="Audio Deepfake Detection", layout="wide")

# Add a custom style for background and font
st.markdown("""
    <style>
        .main {
            background-color: #f7f9fc;
        }
        .title {
            font-family: 'Courier New', Courier, monospace;
            color: #493628;
        }
        .section-header {
            font-size: 24px;
            font-weight: bold;
            margin-top: 10px;  /* Reduced margin to minimize vertical gap */
        }
        .confidence-score {
            font-size: 20px;
            font-weight: bold;
            color: #ff6f61;
        }
    </style>
""", unsafe_allow_html=True)

# App title
st.markdown('<h1 class="title">Audio Deepfake Detection</h1>', unsafe_allow_html=True)
st.write("This application helps you detect whether an audio file is a deepfake or genuine.")

# File uploader
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"])

# Extract features from audio
def extract_features(audio_data, sample_rate):
    df = pd.DataFrame({
        'audio_id': [0],
        'audio_arr': [audio_data],
        'srate': [sample_rate],
        'real_or_fake': [0]
    })
    audio_processor = LowLevelFeatureExtractor(target_sr=16000, include_only=['spectral', 'prosodic', 'voice_quality'])
    feature_computer = HighLevelFeatureExtractor()
    low_level_gen = audio_processor.low_level_feature_generator(df)
    high_level_features  = list(feature_computer.high_level_feature_generator(low_level_gen))
    features_df = pd.DataFrame(high_level_features)
    return features_df

# Plot waveform
def plot_waveform(audio_data, sample_rate):
    fig, ax = plt.subplots(figsize=(10, 2))  # Wide and short waveform plot
    ax.plot(np.linspace(0, len(audio_data) / sample_rate, len(audio_data)), audio_data)
    ax.set_xlabel("Time (s)")
    ax.set_ylabel("Amplitude")
    st.pyplot(fig)

# Process the uploaded file
if uploaded_file is not None:
    # Use columns to display the audio player, waveform, prediction, and confidence side by side
    col1, col2 = st.columns(2)

    with col1:
        st.subheader("Uploaded Audio")
        st.audio(uploaded_file)

        # Show waveform
        st.subheader("Audio Waveform")
        audio_bytes = uploaded_file.read()
        audio_data, sample_rate = librosa.load(BytesIO(audio_bytes), sr=None)
        plot_waveform(audio_data, sample_rate)

    with col2:
        # Extract features
        features_df = extract_features(audio_data, sample_rate)

        predictions, prediction_probabilities = predict(features_df)

        # Display prediction and confidence score
        st.subheader("Prediction Results")

        prediction = predictions[0]
        confidence_score = prediction_probabilities[0][1] * 100

        if prediction == 1:
            st.error("This audio is classified as a Deepfake!")
        else:
            st.success("This audio is classified as Genuine!")

        # Show confidence score using a progress bar
        st.markdown('<h3 class="confidence-score">Confidence Score</h3>', unsafe_allow_html=True)
        st.progress(confidence_score / 100)

        st.write(f"The model is {confidence_score:.2f}% confident in its prediction.")

# Footer or additional information
st.markdown('<h2 class="section-header">How It Works</h2>', unsafe_allow_html=True)
st.write("""
This app uses machine learning models trained on various audio features, such as spectral, prosodic, and voice quality metrics.
It analyzes the audio to classify whether it is a genuine recording or a deepfake, providing a confidence score for its prediction.
""")