Mrkomiljon commited on
Commit
59b5f81
·
verified ·
1 Parent(s): affa9ca

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import librosa
3
+ import numpy as np
4
+ import onnxruntime as ort
5
+
6
+ # Audio padding function
7
+ def pad(x, max_len=64600):
8
+ """
9
+ Pad or trim an audio segment to a fixed length by repeating or slicing.
10
+ """
11
+ x_len = x.shape[0]
12
+ if x_len >= max_len:
13
+ return x[:max_len] # Trim if longer
14
+ # Repeat to fill max_len
15
+ num_repeats = (max_len // x_len) + 1
16
+ padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
17
+ return padded_x
18
+
19
+ # Preprocess audio for a single segment
20
+ def preprocess_audio_segment(segment, cut=64600):
21
+ """
22
+ Preprocess a single audio segment: pad or trim as required.
23
+ """
24
+ segment = pad(segment, max_len=cut)
25
+ return np.expand_dims(np.array(segment, dtype=np.float32), axis=0) # Add batch dimension
26
+
27
+ # Sliding window prediction function
28
+ def predict_with_sliding_window(audio_path, onnx_model_url, window_size=64600, step_size=64600, sample_rate=16000):
29
+ """
30
+ Use a sliding window to predict if the audio is real or fake over the entire audio.
31
+ """
32
+ # Load ONNX runtime session
33
+ ort_session = ort.InferenceSession(onnx_model_url)
34
+
35
+ # Load audio file
36
+ waveform, _ = librosa.load(audio_path, sr=sample_rate)
37
+ total_segments = []
38
+ total_probabilities = []
39
+
40
+ # Sliding window processing
41
+ for start in range(0, len(waveform), step_size):
42
+ end = start + window_size
43
+ segment = waveform[start:end]
44
+
45
+ # Preprocess the segment
46
+ audio_tensor = preprocess_audio_segment(segment)
47
+
48
+ # Perform inference
49
+ inputs = {ort_session.get_inputs()[0].name: audio_tensor}
50
+ outputs = ort_session.run(None, inputs)
51
+ probabilities = np.exp(outputs[0]) # Softmax probabilities
52
+ prediction = np.argmax(probabilities)
53
+
54
+ # Store the results
55
+ predicted_class = "Real" if prediction == 1 else "Fake"
56
+ total_segments.append(predicted_class)
57
+ total_probabilities.append(probabilities[0][prediction])
58
+
59
+ # Final aggregation
60
+ majority_class = max(set(total_segments), key=total_segments.count) # Majority voting
61
+ avg_probability = np.mean(total_probabilities) * 100 # Average probability in percentage
62
+
63
+ return majority_class, avg_probability
64
+
65
+ # Streamlit app
66
+ st.title("Audio Spoof Detection with ONNX Model")
67
+ st.write("Upload an audio file to detect if it is Real or Fake.")
68
+
69
+ # File uploader
70
+ uploaded_file = st.file_uploader("Upload your audio file (WAV or MP3)", type=["wav", "mp3"])
71
+
72
+ if uploaded_file is not None:
73
+ # Path to your ONNX model
74
+ onnx_model_url = "https://huggingface.co/Mrkomiljon/DeepVoiceGuard/blob/main/RawNet_model.onnx"
75
+
76
+ # Save uploaded file temporarily
77
+ with open("temp_audio_file.wav", "wb") as f:
78
+ f.write(uploaded_file.read())
79
+
80
+ # Perform prediction
81
+ with st.spinner("Processing..."):
82
+ result, avg_probability = predict_with_sliding_window("temp_audio_file.wav", onnx_model_url)
83
+
84
+ # Display results
85
+ st.success(f"Prediction: {result}")
86
+ st.info(f"Confidence: {avg_probability:.2f}%")
87
+
88
+ # Clean up temporary file
89
+ import os
90
+ os.remove("temp_audio_file.wav")