Spaces:

fsoft-ai-center
/

Speech-Enhancement

Running

App Files Files Community

fsoft-ai-center commited on Feb 10

Commit

3bbc43f

verified ·

1 Parent(s): 11f42a2

Upload 13 files

Browse files

Files changed (14) hide show

.gitattributes +11 -0
app.py +170 -70
logo.png +0 -0
noisy_speech/EN_+0dB.wav +0 -0
noisy_speech/EN_+3dB.wav +0 -0
noisy_speech/EN_+6dB.wav +0 -0
noisy_speech/EN_-3dB.wav +0 -0
noisy_speech/EN_-6db.wav +0 -0
noisy_speech/JA_+0dB.wav +0 -0
noisy_speech/JA_+3dB.wav +0 -0
noisy_speech/JA_+6dB.wav +0 -0
noisy_speech/JA_-3dB.wav +0 -0
noisy_speech/JA_-6dB.wav +0 -0
title.png +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+logo.png filter=lfs diff=lfs merge=lfs -text
+noisy_speech/EN_-3dB.wav filter=lfs diff=lfs merge=lfs -text
+noisy_speech/EN_-6db.wav filter=lfs diff=lfs merge=lfs -text
+noisy_speech/EN_+0dB.wav filter=lfs diff=lfs merge=lfs -text
+noisy_speech/EN_+3dB.wav filter=lfs diff=lfs merge=lfs -text
+noisy_speech/EN_+6dB.wav filter=lfs diff=lfs merge=lfs -text
+noisy_speech/JA_-3dB.wav filter=lfs diff=lfs merge=lfs -text
+noisy_speech/JA_-6dB.wav filter=lfs diff=lfs merge=lfs -text
+noisy_speech/JA_+0dB.wav filter=lfs diff=lfs merge=lfs -text
+noisy_speech/JA_+3dB.wav filter=lfs diff=lfs merge=lfs -text
+noisy_speech/JA_+6dB.wav filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from myrecorder import recorder
 SR = 16000
-CONTAINER_HEIGHT = 380
 def np_audio_to_bytesio(np_audio, np_audio_sr):
@@ -66,13 +66,17 @@ def wav_to_spec(wav, sr):
 def export_spec_to_buffer(spec):
-    plt.rcParams['figure.figsize'] = (16, 4.5)
     plt.rc('axes', labelsize=15)
     plt.rc('xtick', labelsize=15)
     plt.rc('ytick', labelsize=15)
     librosa.display.specshow(spec, y_axis='log', x_axis='time')
     img_buffer = BytesIO()
     plt.savefig(img_buffer, format='JPEG', bbox_inches='tight', pad_inches=0)
     return img_buffer
@@ -95,86 +99,182 @@ def main():
         layout="wide"
     )
-    logo_space, title_space, _ = st.columns([1, 5, 1], gap="small")
     with logo_space:
-        st.write(
-            """
-            <div style="display: flex; justify-content: left;">
-                <b><span style="text-align: center; color: #101414; font-size: 14px">FPT Corporation</span></b>
-            </div>
-            """,
-            unsafe_allow_html=True
-        )
-        st.image('aic-logo.png')
     with title_space:
-        st.image('logo.png')
-    noisy_speech_files = load_noisy_speech()
-    input_space, output_space = st.columns([1, 1], gap="medium")
-    _, record_space, _, compute_space= st.columns([0.7, 1, 1, 1], gap="small")
-    with record_space:
-        record = recorder(
-            start_prompt="Start Recording",
-            stop_prompt="Stop Recording",
-            just_once=False,
-            use_container_width=False,
-            format="wav",
-            callback=None,
-            args=(),
-            kwargs={},
-            key=None
         )
-    with compute_space:
-        compute = st.button('Denoise')
-    with input_space.container(height=CONTAINER_HEIGHT, border=True):
-        lang_select_space, snr_select_space = st.columns([1, 1], gap="small")
-        with lang_select_space:
-            language_select = st.selectbox("Language", list(noisy_speech_files.keys()))
-        with snr_select_space:
-            if language_select:
-                snr_select = st.selectbox("SNR Level", list(noisy_speech_files[language_select].keys()))
-        if record:
-            wav_bytes_record = record['bytes']
-            sr = record['sample_rate']
-            noisy_wav_22k, noisy_wav = process_recorded_wav_bytes(wav_bytes_record, sr=22050)
-            noisy_spec = wav_to_spec(noisy_wav_22k, sr=22050)
-            noisy_spec_buff = export_spec_to_buffer(noisy_spec)
-            st.audio(wav_bytes_record, format="wav")
-            st.image(image=noisy_spec_buff)
-        elif language_select and snr_select:
-            audio_path = noisy_speech_files[language_select][snr_select]
-            noisy_wav_22k, noisy_wav = load_wav(audio_path)
-            noisy_spec = wav_to_spec(noisy_wav_22k, sr=22050)
-            noisy_spec_buff = export_spec_to_buffer(noisy_spec)
-            st.audio(audio_path, format="wav")
-            st.image(image=noisy_spec_buff)
-    with output_space.container(height=CONTAINER_HEIGHT, border=True):
-        st.write(
-            """
-            <div style="display: flex; justify-content: center;">
-                <b><span style="text-align: center; color: #808080; font-size: 51.5px">Output</span></b>
-            </div>
-            """,
-            unsafe_allow_html=True
-        )
-        if noisy_wav.any() and compute:
-            denoised_wav = denoise(noisy_wav)
-            st.audio(denoised_wav, sample_rate=SR, format="audio/wav")
-            denoised_spec = wav_to_spec(denoised_wav, sr=SR)
-            denoised_spec_buff = export_spec_to_buffer(denoised_spec)
-            st.image(image=denoised_spec_buff)
-            record = None
 if __name__ == '__main__':

 SR = 16000
+CONTAINER_HEIGHT = 340
 def np_audio_to_bytesio(np_audio, np_audio_sr):
 def export_spec_to_buffer(spec):
+    plt.clf()
+    plt.rcParams['figure.figsize'] = (16, 3.6)
     plt.rc('axes', labelsize=15)
     plt.rc('xtick', labelsize=15)
     plt.rc('ytick', labelsize=15)
     librosa.display.specshow(spec, y_axis='log', x_axis='time')
     img_buffer = BytesIO()
+    img_buffer.truncate(0)  # Remove all contents
+    img_buffer.seek(0)  # Reset the pointer to the start
     plt.savefig(img_buffer, format='JPEG', bbox_inches='tight', pad_inches=0)
+    plt.close('all')
     return img_buffer
         layout="wide"
     )
+    logo_space, title_space, _, tooltip_space = st.columns([2.03, 5, 1, 0.75], gap="small")
     with logo_space:
+        st.image('logo.png', width=48)
     with title_space:
+        st.image('title.png', width=640)
+    with tooltip_space:
+        st.markdown(
+            """
+            <style>
+            .tooltip {
+                position: relative;
+                display: inline-block;
+                cursor: pointer;
+                background-color: rgba(0, 76, 153, 1); /* Blue button color */
+                padding: 10px;
+                border-radius: 50%;
+                font-size: 16px;
+                font-weight: bold;
+                width: 40px;
+                height: 40px;
+                text-align: center;
+                line-height: 20px;
+                color: white; /* Text color */
+                box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.2);
+            }
+            .tooltip .tooltiptext {
+                visibility: hidden;
+                width: 300px; /* Adjust width for readability */
+                background-color: #333; /* Dark background for contrast */
+                color: #fff;
+                text-align: left; /* Align text to the left */
+                border-radius: 8px;
+                padding: 15px; /* Add padding for spacing */
+                position: absolute;
+                z-index: 1;
+                top: 150%; /* Position below the button */
+                left: 50%;
+                transform: translateX(-50%);
+                opacity: 0;
+                transition: opacity 0.3s;
+                font-size: 14px;
+                line-height: 1.8; /* Adjust line height for readability */
+                white-space: normal; /* Allow wrapping of text */
+            }
+            .tooltip:hover .tooltiptext {
+                visibility: visible;
+                opacity: 1;
+            }
+            </style>
+            """,
+            unsafe_allow_html=True,
+        )
+        st.markdown(
+            """
+            <div class="tooltip">
+                ℹ
+                <span class="tooltiptext">
+                <strong>Steps:</strong><br>
+                1) Denoise your own speech: Click <em>Start recording</em>, then <em>Stop recording</em> when you are finished.<br>
+                2) Click <em>"Denoise"</em> and wait for a few seconds.<br>
+                3) Both the original audio and denoised audio will be available for playback.<br><br>
+                <strong>Note:</strong> Playing "noise" on your device while recording your speech to emulate speaking in a noisy environment will not work as intended. To do this emulation more realistically, play the noise on a different device (such as your phone) while recording your speech.
+                </span>
+            </div>
+            """,
+            unsafe_allow_html=True,
         )
+    tab1, tab2 = st.tabs(["📂Denoise our samples speech", "🎙️Denoise your own speech"])
+    with tab1:
+        noisy_speech_files = load_noisy_speech()
+        input_space_tab1, output_space_tab1 = st.columns([1, 1], gap="medium")
+        _, _, _, compute_space_tab1= st.columns([0.7, 1, 1, 1], gap="small")
+        with compute_space_tab1:
+            compute_tab1 = st.button('Denoise', key='denoise_tab1')
+        with input_space_tab1.container(height=CONTAINER_HEIGHT, border=True):
+            lang_select_space, snr_select_space = st.columns([1, 1], gap="small")
+            with lang_select_space:
+                language_select = st.selectbox("Language", list(noisy_speech_files.keys()))
+            with snr_select_space:
+                if language_select:
+                    snr_select = st.selectbox("SNR Level", list(noisy_speech_files[language_select].keys()))
+            audio_path_tab1 = noisy_speech_files[language_select][snr_select]
+            noisy_wav_22k_tab1, noisy_wav_tab1 = load_wav(audio_path_tab1)
+            noisy_spec_tab1 = wav_to_spec(noisy_wav_22k_tab1, sr=22050)
+            noisy_spec_buff_tab1 = export_spec_to_buffer(noisy_spec_tab1)
+            st.audio(audio_path_tab1, format="wav")
+            st.image(image=noisy_spec_buff_tab1)
+        with output_space_tab1.container(height=CONTAINER_HEIGHT, border=True):
+            st.write(
+                """
+                <div style="display: flex; justify-content: center;">
+                    <b><span style="text-align: center; color: #808080; font-size: 51.5px">Output</span></b>
+                </div>
+                """,
+                unsafe_allow_html=True
+            )
+            if noisy_wav_tab1.any() and compute_tab1:
+                with st.spinner("Denoising..."):
+                    denoised_wav_tab1 = denoise(noisy_wav_tab1)
+                st.audio(denoised_wav_tab1, sample_rate=SR, format="audio/wav")
+                denoised_spec_tab1 = wav_to_spec(denoised_wav_tab1, sr=SR)
+                denoised_spec_buff_tab1 = export_spec_to_buffer(denoised_spec_tab1)
+                st.image(image=denoised_spec_buff_tab1)
+    with tab2:
+        input_space_tab2, output_space_tab2 = st.columns([1, 1], gap="medium")
+        _, record_space, _, compute_space_tab2 = st.columns([0.7, 1, 1, 1], gap="small")
+        with record_space:
+            record = recorder(
+                start_prompt="Start Recording",
+                stop_prompt="Stop Recording",
+                just_once=False,
+                use_container_width=False,
+                format="wav",
+                callback=None,
+                args=(),
+                kwargs={},
+                key="tab2_recorder"
+            )
+        with compute_space_tab2:
+            compute_tab2 = st.button('Denoise', key='denoise_tab2')
+        noisy_wav_tab2 = np.array([])
+        with input_space_tab2.container(height=CONTAINER_HEIGHT, border=True):
+            st.write(
+                """
+                <div style="display: flex; justify-content: center;">
+                    <b><span style="text-align: center; color: #808080; font-size: 51.5px">Input</span></b>
+                </div>
+                """,
+                unsafe_allow_html=True
+            )
+            if record:
+                wav_bytes_record = record['bytes']
+                sr = record['sample_rate']
+                noisy_wav_22k_tab2, noisy_wav_tab2 = process_recorded_wav_bytes(wav_bytes_record, sr=22050)
+                noisy_spec_tab2 = wav_to_spec(noisy_wav_22k_tab2, sr=22050)
+                noisy_spec_buff_tab2 = export_spec_to_buffer(noisy_spec_tab2)
+                st.audio(wav_bytes_record, format="wav")
+                st.image(image=noisy_spec_buff_tab2)
+        with output_space_tab2.container(height=CONTAINER_HEIGHT, border=True):
+            st.write(
+                """
+                <div style="display: flex; justify-content: center;">
+                    <b><span style="text-align: center; color: #808080; font-size: 51.5px">Output</span></b>
+                </div>
+                """,
+                unsafe_allow_html=True
+            )
+            if noisy_wav_tab2.any() and compute_tab2:
+                with st.spinner("Denoising..."):
+                    denoised_wav_tab2 = denoise(noisy_wav_tab2)
+                st.audio(denoised_wav_tab2, sample_rate=SR, format="audio/wav")
+                denoised_spec_tab2 = wav_to_spec(denoised_wav_tab2, sr=SR)
+                denoised_spec_buff_tab2 = export_spec_to_buffer(denoised_spec_tab2)
+                st.image(image=denoised_spec_buff_tab2)
+                record = None
 if __name__ == '__main__':

logo.png CHANGED Viewed

Git LFS Details

SHA256: beeb8a9707775981c208c3aedd9541b9023888f2c32afaa772172ae65237be8b
Pointer size: 131 Bytes
Size of remote file: 111 kB

noisy_speech/EN_+0dB.wav CHANGED Viewed

Binary files a/noisy_speech/EN_+0dB.wav and b/noisy_speech/EN_+0dB.wav differ

noisy_speech/EN_+3dB.wav CHANGED Viewed

Binary files a/noisy_speech/EN_+3dB.wav and b/noisy_speech/EN_+3dB.wav differ

noisy_speech/EN_+6dB.wav CHANGED Viewed

Binary files a/noisy_speech/EN_+6dB.wav and b/noisy_speech/EN_+6dB.wav differ

noisy_speech/EN_-3dB.wav CHANGED Viewed

Binary files a/noisy_speech/EN_-3dB.wav and b/noisy_speech/EN_-3dB.wav differ

noisy_speech/EN_-6db.wav CHANGED Viewed

Binary files a/noisy_speech/EN_-6db.wav and b/noisy_speech/EN_-6db.wav differ

noisy_speech/JA_+0dB.wav CHANGED Viewed

Binary files a/noisy_speech/JA_+0dB.wav and b/noisy_speech/JA_+0dB.wav differ

noisy_speech/JA_+3dB.wav CHANGED Viewed

Binary files a/noisy_speech/JA_+3dB.wav and b/noisy_speech/JA_+3dB.wav differ

noisy_speech/JA_+6dB.wav CHANGED Viewed

Binary files a/noisy_speech/JA_+6dB.wav and b/noisy_speech/JA_+6dB.wav differ

noisy_speech/JA_-3dB.wav CHANGED Viewed

Binary files a/noisy_speech/JA_-3dB.wav and b/noisy_speech/JA_-3dB.wav differ

noisy_speech/JA_-6dB.wav CHANGED Viewed

Binary files a/noisy_speech/JA_-6dB.wav and b/noisy_speech/JA_-6dB.wav differ

title.png ADDED Viewed