ojas121 commited on
Commit
a9f8ee6
Β·
verified Β·
1 Parent(s): 17a00d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -24
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import streamlit as st
2
  import librosa
3
  import librosa.display
@@ -8,32 +10,31 @@ import wave
8
  import json
9
  from vosk import Model, KaldiRecognizer
10
  from transformers import pipeline
11
- import os
12
  from pydub import AudioSegment
13
  import noisereduce as nr
14
 
15
- import streamlit as st
16
- import subprocess
17
-
18
- try:
19
- import librosa
20
- st.write("βœ… Librosa is installed successfully!")
21
- except ImportError:
22
- st.write("❌ Librosa is missing! Installing now...")
23
- subprocess.run(["pip", "install", "librosa"])
24
- import librosa
25
- st.write("βœ… Librosa installed successfully!")
26
 
 
 
27
 
 
 
 
 
 
28
 
29
- # Load Vosk model
30
- MODEL_PATH = "vosk-model-small-en-us-0.15"
31
- if not os.path.exists(MODEL_PATH):
32
- st.error("Vosk model not found! Please download and extract it.")
33
- st.stop()
34
- model = Model(MODEL_PATH)
35
 
36
- # Streamlit UI
37
  st.title("πŸŽ™οΈ Speech Detection System using Mozilla Common Voice")
38
  st.write("Upload an audio file and get real-time speech-to-text, noise filtering, and emotion analysis.")
39
 
@@ -60,12 +61,12 @@ if uploaded_file:
60
  librosa.display.waveshow(y, sr=sr, ax=ax)
61
  st.pyplot(fig)
62
 
63
- # Noise Reduction
64
  y_denoised = nr.reduce_noise(y=y, sr=sr)
65
  denoised_path = file_path.replace(".wav", "_denoised.wav")
66
  sf.write(denoised_path, y_denoised, sr)
67
 
68
- # Speech-to-Text using Vosk
69
  def transcribe_audio(audio_path):
70
  wf = wave.open(audio_path, "rb")
71
  rec = KaldiRecognizer(model, wf.getframerate())
@@ -82,14 +83,13 @@ if uploaded_file:
82
  st.subheader("πŸ“ Transcribed Text:")
83
  st.write(transcription)
84
 
85
- # Emotion Detection
86
- emotion_model = pipeline("audio-classification", model="superb/wav2vec2-large-xlsr-53")
87
  emotion_result = emotion_model(file_path)
88
 
89
  st.subheader("😊 Emotion Analysis:")
90
  st.write(emotion_result)
91
 
92
- # Play original and denoised audio
93
  st.audio(file_path, format="audio/wav", start_time=0)
94
  st.subheader("πŸ”Š Denoised Audio:")
95
  st.audio(denoised_path, format="audio/wav", start_time=0)
 
1
+ import os
2
+ import subprocess
3
  import streamlit as st
4
  import librosa
5
  import librosa.display
 
10
  import json
11
  from vosk import Model, KaldiRecognizer
12
  from transformers import pipeline
13
+ from huggingface_hub import snapshot_download
14
  from pydub import AudioSegment
15
  import noisereduce as nr
16
 
17
+ # βœ… Auto-Download Vosk Model (Speech-to-Text)
18
+ VOSK_MODEL = "vosk-model-small-en-us-0.15"
19
+ if not os.path.exists(VOSK_MODEL):
20
+ st.write("Downloading Vosk Model...")
21
+ subprocess.run(["wget", "-O", "vosk.zip", "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"])
22
+ subprocess.run(["unzip", "vosk.zip"])
23
+ subprocess.run(["rm", "vosk.zip"])
 
 
 
 
24
 
25
+ # Load Vosk model
26
+ model = Model(VOSK_MODEL)
27
 
28
+ # βœ… Auto-Download Wav2Vec2 Model (Emotion Detection)
29
+ WAV2VEC_MODEL = "superb/wav2vec2-large-xlsr-53"
30
+ if not os.path.exists(WAV2VEC_MODEL):
31
+ st.write(f"Downloading {WAV2VEC_MODEL}...")
32
+ snapshot_download(repo_id=WAV2VEC_MODEL, local_dir=WAV2VEC_MODEL)
33
 
34
+ # Load emotion detection model
35
+ emotion_model = pipeline("audio-classification", model=WAV2VEC_MODEL)
 
 
 
 
36
 
37
+ # βœ… Streamlit UI
38
  st.title("πŸŽ™οΈ Speech Detection System using Mozilla Common Voice")
39
  st.write("Upload an audio file and get real-time speech-to-text, noise filtering, and emotion analysis.")
40
 
 
61
  librosa.display.waveshow(y, sr=sr, ax=ax)
62
  st.pyplot(fig)
63
 
64
+ # βœ… Noise Reduction
65
  y_denoised = nr.reduce_noise(y=y, sr=sr)
66
  denoised_path = file_path.replace(".wav", "_denoised.wav")
67
  sf.write(denoised_path, y_denoised, sr)
68
 
69
+ # βœ… Speech-to-Text using Vosk
70
  def transcribe_audio(audio_path):
71
  wf = wave.open(audio_path, "rb")
72
  rec = KaldiRecognizer(model, wf.getframerate())
 
83
  st.subheader("πŸ“ Transcribed Text:")
84
  st.write(transcription)
85
 
86
+ # βœ… Emotion Detection
 
87
  emotion_result = emotion_model(file_path)
88
 
89
  st.subheader("😊 Emotion Analysis:")
90
  st.write(emotion_result)
91
 
92
+ # βœ… Play Original & Denoised Audio
93
  st.audio(file_path, format="audio/wav", start_time=0)
94
  st.subheader("πŸ”Š Denoised Audio:")
95
  st.audio(denoised_path, format="audio/wav", start_time=0)