ojas121 commited on
Commit
3867db1
Β·
verified Β·
1 Parent(s): e3f2196

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -12
app.py CHANGED
@@ -10,16 +10,52 @@ import wave
10
  import json
11
  from vosk import Model, KaldiRecognizer
12
  from transformers import pipeline
13
- from huggingface_hub import snapshot_download, login
14
  from pydub import AudioSegment
15
  import noisereduce as nr
16
 
17
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # βœ… Auto-Download Vosk Model (Speech-to-Text)
20
  VOSK_MODEL = "vosk-model-small-en-us-0.15"
21
  if not os.path.exists(VOSK_MODEL):
22
- st.write("Downloading Vosk Model...")
23
  subprocess.run(["wget", "-O", "vosk.zip", "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"])
24
  subprocess.run(["unzip", "vosk.zip"])
25
  subprocess.run(["rm", "vosk.zip"])
@@ -30,15 +66,15 @@ model = Model(VOSK_MODEL)
30
  # βœ… Auto-Download Wav2Vec2 Model (Emotion Detection)
31
  WAV2VEC_MODEL = "facebook/wav2vec2-large-xlsr-53"
32
  if not os.path.exists(WAV2VEC_MODEL):
33
- st.write(f"Downloading {WAV2VEC_MODEL}...")
34
  snapshot_download(repo_id=WAV2VEC_MODEL, local_dir=WAV2VEC_MODEL)
35
 
36
  # Load emotion detection model
37
  emotion_model = pipeline("audio-classification", model=WAV2VEC_MODEL)
38
 
39
  # βœ… Streamlit UI
40
- st.title("πŸŽ™οΈ Speech Detection System using Mozilla Common Voice")
41
- st.write("Upload an audio file and get real-time speech-to-text, noise filtering, and emotion analysis.")
42
 
43
  uploaded_file = st.file_uploader("Upload an MP3/WAV file", type=["mp3", "wav"])
44
 
@@ -58,12 +94,14 @@ if uploaded_file:
58
  # Load audio
59
  y, sr = librosa.load(file_path, sr=16000)
60
 
61
- # Display waveform
 
62
  fig, ax = plt.subplots(figsize=(10, 4))
63
  librosa.display.waveshow(y, sr=sr, ax=ax)
64
  st.pyplot(fig)
65
 
66
  # βœ… Noise Reduction
 
67
  y_denoised = nr.reduce_noise(y=y, sr=sr)
68
  denoised_path = file_path.replace(".wav", "_denoised.wav")
69
  sf.write(denoised_path, y_denoised, sr)
@@ -82,16 +120,18 @@ if uploaded_file:
82
  return result["text"]
83
 
84
  transcription = transcribe_audio(file_path)
85
- st.subheader("πŸ“ Transcribed Text:")
86
- st.write(transcription)
 
87
 
88
  # βœ… Emotion Detection
 
89
  emotion_result = emotion_model(file_path)
90
-
91
- st.subheader("😊 Emotion Analysis:")
92
  st.write(emotion_result)
93
 
94
  # βœ… Play Original & Denoised Audio
 
95
  st.audio(file_path, format="audio/wav", start_time=0)
96
- st.subheader("πŸ”Š Denoised Audio:")
 
97
  st.audio(denoised_path, format="audio/wav", start_time=0)
 
10
  import json
11
  from vosk import Model, KaldiRecognizer
12
  from transformers import pipeline
13
+ from huggingface_hub import snapshot_download
14
  from pydub import AudioSegment
15
  import noisereduce as nr
16
 
17
+ # 🎨 Apply Custom CSS Styling
18
+ st.markdown(
19
+ """
20
+ <style>
21
+ .stApp {
22
+ background-color: #f0f2f6;
23
+ }
24
+ .title {
25
+ font-size: 32px;
26
+ text-align: center;
27
+ color: #4A90E2;
28
+ font-weight: bold;
29
+ }
30
+ .subheader {
31
+ font-size: 20px;
32
+ font-weight: bold;
33
+ color: #333;
34
+ }
35
+ .stButton>button {
36
+ background-color: #4A90E2 !important;
37
+ color: white !important;
38
+ font-size: 18px !important;
39
+ padding: 10px 24px !important;
40
+ border-radius: 10px !important;
41
+ border: none !important;
42
+ }
43
+ .stAudio {
44
+ width: 100% !important;
45
+ }
46
+ .stMarkdown {
47
+ font-size: 16px;
48
+ color: #333;
49
+ }
50
+ </style>
51
+ """,
52
+ unsafe_allow_html=True
53
+ )
54
 
55
  # βœ… Auto-Download Vosk Model (Speech-to-Text)
56
  VOSK_MODEL = "vosk-model-small-en-us-0.15"
57
  if not os.path.exists(VOSK_MODEL):
58
+ st.write("πŸ“₯ Downloading Vosk Model...")
59
  subprocess.run(["wget", "-O", "vosk.zip", "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"])
60
  subprocess.run(["unzip", "vosk.zip"])
61
  subprocess.run(["rm", "vosk.zip"])
 
66
  # βœ… Auto-Download Wav2Vec2 Model (Emotion Detection)
67
  WAV2VEC_MODEL = "facebook/wav2vec2-large-xlsr-53"
68
  if not os.path.exists(WAV2VEC_MODEL):
69
+ st.write(f"πŸ“₯ Downloading {WAV2VEC_MODEL}...")
70
  snapshot_download(repo_id=WAV2VEC_MODEL, local_dir=WAV2VEC_MODEL)
71
 
72
  # Load emotion detection model
73
  emotion_model = pipeline("audio-classification", model=WAV2VEC_MODEL)
74
 
75
  # βœ… Streamlit UI
76
+ st.markdown("<div class='title'>πŸŽ™οΈ Speech Detection System</div>", unsafe_allow_html=True)
77
+ st.markdown("<div class='subheader'>πŸ” Upload an audio file for speech-to-text, noise filtering, and emotion analysis.</div>", unsafe_allow_html=True)
78
 
79
  uploaded_file = st.file_uploader("Upload an MP3/WAV file", type=["mp3", "wav"])
80
 
 
94
  # Load audio
95
  y, sr = librosa.load(file_path, sr=16000)
96
 
97
+ # 🎡 Display waveform
98
+ st.markdown("<div class='subheader'>🎼 Audio Waveform:</div>", unsafe_allow_html=True)
99
  fig, ax = plt.subplots(figsize=(10, 4))
100
  librosa.display.waveshow(y, sr=sr, ax=ax)
101
  st.pyplot(fig)
102
 
103
  # βœ… Noise Reduction
104
+ st.markdown("<div class='subheader'>πŸ”‡ Applying Noise Reduction...</div>", unsafe_allow_html=True)
105
  y_denoised = nr.reduce_noise(y=y, sr=sr)
106
  denoised_path = file_path.replace(".wav", "_denoised.wav")
107
  sf.write(denoised_path, y_denoised, sr)
 
120
  return result["text"]
121
 
122
  transcription = transcribe_audio(file_path)
123
+
124
+ st.markdown("<div class='subheader'>πŸ“ Transcribed Text:</div>", unsafe_allow_html=True)
125
+ st.markdown(f"<div class='stMarkdown'>{transcription}</div>", unsafe_allow_html=True)
126
 
127
  # βœ… Emotion Detection
128
+ st.markdown("<div class='subheader'>😊 Emotion Analysis:</div>", unsafe_allow_html=True)
129
  emotion_result = emotion_model(file_path)
 
 
130
  st.write(emotion_result)
131
 
132
  # βœ… Play Original & Denoised Audio
133
+ st.markdown("<div class='subheader'>πŸ”Š Play Audio:</div>", unsafe_allow_html=True)
134
  st.audio(file_path, format="audio/wav", start_time=0)
135
+
136
+ st.markdown("<div class='subheader'>πŸ”‡ Denoised Audio:</div>", unsafe_allow_html=True)
137
  st.audio(denoised_path, format="audio/wav", start_time=0)