Spaces:

luluw
/

Conformer-CTC-Small

Running

luluw commited on Apr 24

Commit

182ac63

verified ·

1 Parent(s): 2525207

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,6 +26,12 @@ def preprocess_audio(audio_file, featurizer, target_sample_rate=16000):
     Preprocess the audio: load, resample, and extract features.
     """
     try:
         waveform, sample_rate = torchaudio.load(audio_file)
         if sample_rate != target_sample_rate:
             waveform = Resample(orig_freq=sample_rate, new_freq=target_sample_rate)(waveform)
@@ -42,8 +48,8 @@ def decode_emission(emission, tokens, files):
             lm=files.lm,
             nbest=1,
             beam_size=100,
-            beam_threshold=75,
-            beam_size_token=30,
             lm_weight=LM_WEIGHT,
             word_score=WORD_SCORE,
         )
@@ -80,9 +86,8 @@ def launch_app(model_path, token_path="tokens.txt", share=False):
         inputs=gr.Audio(sources="microphone", type="filepath", label="Speak into the microphone"),
         outputs="text",
         title="Conformer-Small ASR Model",
-        description="""Conformer trained on Mozilla Corpus and LibriSpeech.<br>
-                   Training Script Available <a href="https://github.com/LuluW8071/Conformer" target="_blank">here</a><br>
-                   Experiment Results Available <a href="https://github.com/LuluW8071/Conformer/experiments" target="_blank">here</a>""",
     )
     interface.launch(share=share)

     Preprocess the audio: load, resample, and extract features.
     """
     try:
+        # Wait for file to be saved
+        wait_time = 0
+        while not os.path.exists(audio_file) and wait_time < 3:
+            time.sleep(0.1)
+            wait_time += 0.1
         waveform, sample_rate = torchaudio.load(audio_file)
         if sample_rate != target_sample_rate:
             waveform = Resample(orig_freq=sample_rate, new_freq=target_sample_rate)(waveform)
             lm=files.lm,
             nbest=1,
             beam_size=100,
+            beam_threshold=50,
+            beam_size_token=25,
             lm_weight=LM_WEIGHT,
             word_score=WORD_SCORE,
         )
         inputs=gr.Audio(sources="microphone", type="filepath", label="Speak into the microphone"),
         outputs="text",
         title="Conformer-Small ASR Model",
+        description="""<b>Trained on:</b> Mozilla Corpus, Personal Recordings, and LibriSpeech — 2900 hrs of audio data.<br>
+                       <b>Training Script and Experiment Results</b> available <a href="https://github.com/LuluW8071/Conformer" target="_blank">here</a>""",
     )
     interface.launch(share=share)