fsoft-ai-center commited on
Commit
3bbc43f
·
verified ·
1 Parent(s): 11f42a2

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ logo.png filter=lfs diff=lfs merge=lfs -text
37
+ noisy_speech/EN_-3dB.wav filter=lfs diff=lfs merge=lfs -text
38
+ noisy_speech/EN_-6db.wav filter=lfs diff=lfs merge=lfs -text
39
+ noisy_speech/EN_+0dB.wav filter=lfs diff=lfs merge=lfs -text
40
+ noisy_speech/EN_+3dB.wav filter=lfs diff=lfs merge=lfs -text
41
+ noisy_speech/EN_+6dB.wav filter=lfs diff=lfs merge=lfs -text
42
+ noisy_speech/JA_-3dB.wav filter=lfs diff=lfs merge=lfs -text
43
+ noisy_speech/JA_-6dB.wav filter=lfs diff=lfs merge=lfs -text
44
+ noisy_speech/JA_+0dB.wav filter=lfs diff=lfs merge=lfs -text
45
+ noisy_speech/JA_+3dB.wav filter=lfs diff=lfs merge=lfs -text
46
+ noisy_speech/JA_+6dB.wav filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -13,7 +13,7 @@ from myrecorder import recorder
13
 
14
 
15
  SR = 16000
16
- CONTAINER_HEIGHT = 380
17
 
18
 
19
  def np_audio_to_bytesio(np_audio, np_audio_sr):
@@ -66,13 +66,17 @@ def wav_to_spec(wav, sr):
66
 
67
 
68
  def export_spec_to_buffer(spec):
69
- plt.rcParams['figure.figsize'] = (16, 4.5)
 
70
  plt.rc('axes', labelsize=15)
71
  plt.rc('xtick', labelsize=15)
72
  plt.rc('ytick', labelsize=15)
73
  librosa.display.specshow(spec, y_axis='log', x_axis='time')
74
  img_buffer = BytesIO()
 
 
75
  plt.savefig(img_buffer, format='JPEG', bbox_inches='tight', pad_inches=0)
 
76
  return img_buffer
77
 
78
 
@@ -95,86 +99,182 @@ def main():
95
  layout="wide"
96
  )
97
 
98
- logo_space, title_space, _ = st.columns([1, 5, 1], gap="small")
99
 
100
  with logo_space:
101
- st.write(
102
- """
103
- <div style="display: flex; justify-content: left;">
104
- <b><span style="text-align: center; color: #101414; font-size: 14px">FPT Corporation</span></b>
105
- </div>
106
- """,
107
- unsafe_allow_html=True
108
- )
109
- st.image('aic-logo.png')
110
 
111
  with title_space:
112
- st.image('logo.png')
113
 
114
- noisy_speech_files = load_noisy_speech()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
- input_space, output_space = st.columns([1, 1], gap="medium")
117
- _, record_space, _, compute_space= st.columns([0.7, 1, 1, 1], gap="small")
118
-
119
- with record_space:
120
- record = recorder(
121
- start_prompt="Start Recording",
122
- stop_prompt="Stop Recording",
123
- just_once=False,
124
- use_container_width=False,
125
- format="wav",
126
- callback=None,
127
- args=(),
128
- kwargs={},
129
- key=None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  )
131
 
132
- with compute_space:
133
- compute = st.button('Denoise')
134
 
135
- with input_space.container(height=CONTAINER_HEIGHT, border=True):
136
- lang_select_space, snr_select_space = st.columns([1, 1], gap="small")
137
- with lang_select_space:
138
- language_select = st.selectbox("Language", list(noisy_speech_files.keys()))
139
- with snr_select_space:
140
- if language_select:
141
- snr_select = st.selectbox("SNR Level", list(noisy_speech_files[language_select].keys()))
 
142
 
143
- if record:
144
- wav_bytes_record = record['bytes']
145
- sr = record['sample_rate']
146
- noisy_wav_22k, noisy_wav = process_recorded_wav_bytes(wav_bytes_record, sr=22050)
147
- noisy_spec = wav_to_spec(noisy_wav_22k, sr=22050)
148
- noisy_spec_buff = export_spec_to_buffer(noisy_spec)
 
149
 
150
- st.audio(wav_bytes_record, format="wav")
151
- st.image(image=noisy_spec_buff)
152
-
153
- elif language_select and snr_select:
154
- audio_path = noisy_speech_files[language_select][snr_select]
155
- noisy_wav_22k, noisy_wav = load_wav(audio_path)
156
- noisy_spec = wav_to_spec(noisy_wav_22k, sr=22050)
157
- noisy_spec_buff = export_spec_to_buffer(noisy_spec)
158
 
159
- st.audio(audio_path, format="wav")
160
- st.image(image=noisy_spec_buff)
161
-
162
- with output_space.container(height=CONTAINER_HEIGHT, border=True):
163
- st.write(
164
- """
165
- <div style="display: flex; justify-content: center;">
166
- <b><span style="text-align: center; color: #808080; font-size: 51.5px">Output</span></b>
167
- </div>
168
- """,
169
- unsafe_allow_html=True
170
- )
171
- if noisy_wav.any() and compute:
172
- denoised_wav = denoise(noisy_wav)
173
- st.audio(denoised_wav, sample_rate=SR, format="audio/wav")
174
- denoised_spec = wav_to_spec(denoised_wav, sr=SR)
175
- denoised_spec_buff = export_spec_to_buffer(denoised_spec)
176
- st.image(image=denoised_spec_buff)
177
- record = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
 
180
  if __name__ == '__main__':
 
13
 
14
 
15
  SR = 16000
16
+ CONTAINER_HEIGHT = 340
17
 
18
 
19
  def np_audio_to_bytesio(np_audio, np_audio_sr):
 
66
 
67
 
68
  def export_spec_to_buffer(spec):
69
+ plt.clf()
70
+ plt.rcParams['figure.figsize'] = (16, 3.6)
71
  plt.rc('axes', labelsize=15)
72
  plt.rc('xtick', labelsize=15)
73
  plt.rc('ytick', labelsize=15)
74
  librosa.display.specshow(spec, y_axis='log', x_axis='time')
75
  img_buffer = BytesIO()
76
+ img_buffer.truncate(0) # Remove all contents
77
+ img_buffer.seek(0) # Reset the pointer to the start
78
  plt.savefig(img_buffer, format='JPEG', bbox_inches='tight', pad_inches=0)
79
+ plt.close('all')
80
  return img_buffer
81
 
82
 
 
99
  layout="wide"
100
  )
101
 
102
+ logo_space, title_space, _, tooltip_space = st.columns([2.03, 5, 1, 0.75], gap="small")
103
 
104
  with logo_space:
105
+ st.image('logo.png', width=48)
 
 
 
 
 
 
 
 
106
 
107
  with title_space:
108
+ st.image('title.png', width=640)
109
 
110
+ with tooltip_space:
111
+ st.markdown(
112
+ """
113
+ <style>
114
+ .tooltip {
115
+ position: relative;
116
+ display: inline-block;
117
+ cursor: pointer;
118
+ background-color: rgba(0, 76, 153, 1); /* Blue button color */
119
+ padding: 10px;
120
+ border-radius: 50%;
121
+ font-size: 16px;
122
+ font-weight: bold;
123
+ width: 40px;
124
+ height: 40px;
125
+ text-align: center;
126
+ line-height: 20px;
127
+ color: white; /* Text color */
128
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.2);
129
+ }
130
 
131
+ .tooltip .tooltiptext {
132
+ visibility: hidden;
133
+ width: 300px; /* Adjust width for readability */
134
+ background-color: #333; /* Dark background for contrast */
135
+ color: #fff;
136
+ text-align: left; /* Align text to the left */
137
+ border-radius: 8px;
138
+ padding: 15px; /* Add padding for spacing */
139
+ position: absolute;
140
+ z-index: 1;
141
+ top: 150%; /* Position below the button */
142
+ left: 50%;
143
+ transform: translateX(-50%);
144
+ opacity: 0;
145
+ transition: opacity 0.3s;
146
+ font-size: 14px;
147
+ line-height: 1.8; /* Adjust line height for readability */
148
+ white-space: normal; /* Allow wrapping of text */
149
+ }
150
+
151
+ .tooltip:hover .tooltiptext {
152
+ visibility: visible;
153
+ opacity: 1;
154
+ }
155
+ </style>
156
+ """,
157
+ unsafe_allow_html=True,
158
+ )
159
+
160
+ st.markdown(
161
+ """
162
+ <div class="tooltip">
163
+
164
+ <span class="tooltiptext">
165
+ <strong>Steps:</strong><br>
166
+ 1) Denoise your own speech: Click <em>Start recording</em>, then <em>Stop recording</em> when you are finished.<br>
167
+ 2) Click <em>"Denoise"</em> and wait for a few seconds.<br>
168
+ 3) Both the original audio and denoised audio will be available for playback.<br><br>
169
+ <strong>Note:</strong> Playing "noise" on your device while recording your speech to emulate speaking in a noisy environment will not work as intended. To do this emulation more realistically, play the noise on a different device (such as your phone) while recording your speech.
170
+ </span>
171
+ </div>
172
+ """,
173
+ unsafe_allow_html=True,
174
  )
175
 
176
+ tab1, tab2 = st.tabs(["📂Denoise our samples speech", "🎙️Denoise your own speech"])
 
177
 
178
+ with tab1:
179
+ noisy_speech_files = load_noisy_speech()
180
+
181
+ input_space_tab1, output_space_tab1 = st.columns([1, 1], gap="medium")
182
+ _, _, _, compute_space_tab1= st.columns([0.7, 1, 1, 1], gap="small")
183
+
184
+ with compute_space_tab1:
185
+ compute_tab1 = st.button('Denoise', key='denoise_tab1')
186
 
187
+ with input_space_tab1.container(height=CONTAINER_HEIGHT, border=True):
188
+ lang_select_space, snr_select_space = st.columns([1, 1], gap="small")
189
+ with lang_select_space:
190
+ language_select = st.selectbox("Language", list(noisy_speech_files.keys()))
191
+ with snr_select_space:
192
+ if language_select:
193
+ snr_select = st.selectbox("SNR Level", list(noisy_speech_files[language_select].keys()))
194
 
195
+ audio_path_tab1 = noisy_speech_files[language_select][snr_select]
196
+ noisy_wav_22k_tab1, noisy_wav_tab1 = load_wav(audio_path_tab1)
197
+ noisy_spec_tab1 = wav_to_spec(noisy_wav_22k_tab1, sr=22050)
198
+ noisy_spec_buff_tab1 = export_spec_to_buffer(noisy_spec_tab1)
 
 
 
 
199
 
200
+ st.audio(audio_path_tab1, format="wav")
201
+ st.image(image=noisy_spec_buff_tab1)
202
+
203
+ with output_space_tab1.container(height=CONTAINER_HEIGHT, border=True):
204
+ st.write(
205
+ """
206
+ <div style="display: flex; justify-content: center;">
207
+ <b><span style="text-align: center; color: #808080; font-size: 51.5px">Output</span></b>
208
+ </div>
209
+ """,
210
+ unsafe_allow_html=True
211
+ )
212
+ if noisy_wav_tab1.any() and compute_tab1:
213
+ with st.spinner("Denoising..."):
214
+ denoised_wav_tab1 = denoise(noisy_wav_tab1)
215
+ st.audio(denoised_wav_tab1, sample_rate=SR, format="audio/wav")
216
+ denoised_spec_tab1 = wav_to_spec(denoised_wav_tab1, sr=SR)
217
+ denoised_spec_buff_tab1 = export_spec_to_buffer(denoised_spec_tab1)
218
+ st.image(image=denoised_spec_buff_tab1)
219
+
220
+ with tab2:
221
+ input_space_tab2, output_space_tab2 = st.columns([1, 1], gap="medium")
222
+ _, record_space, _, compute_space_tab2 = st.columns([0.7, 1, 1, 1], gap="small")
223
+
224
+ with record_space:
225
+ record = recorder(
226
+ start_prompt="Start Recording",
227
+ stop_prompt="Stop Recording",
228
+ just_once=False,
229
+ use_container_width=False,
230
+ format="wav",
231
+ callback=None,
232
+ args=(),
233
+ kwargs={},
234
+ key="tab2_recorder"
235
+ )
236
+
237
+ with compute_space_tab2:
238
+ compute_tab2 = st.button('Denoise', key='denoise_tab2')
239
+
240
+ noisy_wav_tab2 = np.array([])
241
+ with input_space_tab2.container(height=CONTAINER_HEIGHT, border=True):
242
+ st.write(
243
+ """
244
+ <div style="display: flex; justify-content: center;">
245
+ <b><span style="text-align: center; color: #808080; font-size: 51.5px">Input</span></b>
246
+ </div>
247
+ """,
248
+ unsafe_allow_html=True
249
+ )
250
+
251
+ if record:
252
+ wav_bytes_record = record['bytes']
253
+ sr = record['sample_rate']
254
+ noisy_wav_22k_tab2, noisy_wav_tab2 = process_recorded_wav_bytes(wav_bytes_record, sr=22050)
255
+ noisy_spec_tab2 = wav_to_spec(noisy_wav_22k_tab2, sr=22050)
256
+ noisy_spec_buff_tab2 = export_spec_to_buffer(noisy_spec_tab2)
257
+
258
+ st.audio(wav_bytes_record, format="wav")
259
+ st.image(image=noisy_spec_buff_tab2)
260
+
261
+ with output_space_tab2.container(height=CONTAINER_HEIGHT, border=True):
262
+ st.write(
263
+ """
264
+ <div style="display: flex; justify-content: center;">
265
+ <b><span style="text-align: center; color: #808080; font-size: 51.5px">Output</span></b>
266
+ </div>
267
+ """,
268
+ unsafe_allow_html=True
269
+ )
270
+ if noisy_wav_tab2.any() and compute_tab2:
271
+ with st.spinner("Denoising..."):
272
+ denoised_wav_tab2 = denoise(noisy_wav_tab2)
273
+ st.audio(denoised_wav_tab2, sample_rate=SR, format="audio/wav")
274
+ denoised_spec_tab2 = wav_to_spec(denoised_wav_tab2, sr=SR)
275
+ denoised_spec_buff_tab2 = export_spec_to_buffer(denoised_spec_tab2)
276
+ st.image(image=denoised_spec_buff_tab2)
277
+ record = None
278
 
279
 
280
  if __name__ == '__main__':
logo.png CHANGED

Git LFS Details

  • SHA256: beeb8a9707775981c208c3aedd9541b9023888f2c32afaa772172ae65237be8b
  • Pointer size: 131 Bytes
  • Size of remote file: 111 kB
noisy_speech/EN_+0dB.wav CHANGED
Binary files a/noisy_speech/EN_+0dB.wav and b/noisy_speech/EN_+0dB.wav differ
 
noisy_speech/EN_+3dB.wav CHANGED
Binary files a/noisy_speech/EN_+3dB.wav and b/noisy_speech/EN_+3dB.wav differ
 
noisy_speech/EN_+6dB.wav CHANGED
Binary files a/noisy_speech/EN_+6dB.wav and b/noisy_speech/EN_+6dB.wav differ
 
noisy_speech/EN_-3dB.wav CHANGED
Binary files a/noisy_speech/EN_-3dB.wav and b/noisy_speech/EN_-3dB.wav differ
 
noisy_speech/EN_-6db.wav CHANGED
Binary files a/noisy_speech/EN_-6db.wav and b/noisy_speech/EN_-6db.wav differ
 
noisy_speech/JA_+0dB.wav CHANGED
Binary files a/noisy_speech/JA_+0dB.wav and b/noisy_speech/JA_+0dB.wav differ
 
noisy_speech/JA_+3dB.wav CHANGED
Binary files a/noisy_speech/JA_+3dB.wav and b/noisy_speech/JA_+3dB.wav differ
 
noisy_speech/JA_+6dB.wav CHANGED
Binary files a/noisy_speech/JA_+6dB.wav and b/noisy_speech/JA_+6dB.wav differ
 
noisy_speech/JA_-3dB.wav CHANGED
Binary files a/noisy_speech/JA_-3dB.wav and b/noisy_speech/JA_-3dB.wav differ
 
noisy_speech/JA_-6dB.wav CHANGED
Binary files a/noisy_speech/JA_-6dB.wav and b/noisy_speech/JA_-6dB.wav differ
 
title.png ADDED