Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -26,6 +26,12 @@ def preprocess_audio(audio_file, featurizer, target_sample_rate=16000):
|
|
26 |
Preprocess the audio: load, resample, and extract features.
|
27 |
"""
|
28 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
waveform, sample_rate = torchaudio.load(audio_file)
|
30 |
if sample_rate != target_sample_rate:
|
31 |
waveform = Resample(orig_freq=sample_rate, new_freq=target_sample_rate)(waveform)
|
@@ -42,8 +48,8 @@ def decode_emission(emission, tokens, files):
|
|
42 |
lm=files.lm,
|
43 |
nbest=1,
|
44 |
beam_size=100,
|
45 |
-
beam_threshold=
|
46 |
-
beam_size_token=
|
47 |
lm_weight=LM_WEIGHT,
|
48 |
word_score=WORD_SCORE,
|
49 |
)
|
@@ -80,9 +86,8 @@ def launch_app(model_path, token_path="tokens.txt", share=False):
|
|
80 |
inputs=gr.Audio(sources="microphone", type="filepath", label="Speak into the microphone"),
|
81 |
outputs="text",
|
82 |
title="Conformer-Small ASR Model",
|
83 |
-
description="""
|
84 |
-
|
85 |
-
Experiment Results Available <a href="https://github.com/LuluW8071/Conformer/experiments" target="_blank">here</a>""",
|
86 |
)
|
87 |
|
88 |
interface.launch(share=share)
|
|
|
26 |
Preprocess the audio: load, resample, and extract features.
|
27 |
"""
|
28 |
try:
|
29 |
+
# Wait for file to be saved
|
30 |
+
wait_time = 0
|
31 |
+
while not os.path.exists(audio_file) and wait_time < 3:
|
32 |
+
time.sleep(0.1)
|
33 |
+
wait_time += 0.1
|
34 |
+
|
35 |
waveform, sample_rate = torchaudio.load(audio_file)
|
36 |
if sample_rate != target_sample_rate:
|
37 |
waveform = Resample(orig_freq=sample_rate, new_freq=target_sample_rate)(waveform)
|
|
|
48 |
lm=files.lm,
|
49 |
nbest=1,
|
50 |
beam_size=100,
|
51 |
+
beam_threshold=50,
|
52 |
+
beam_size_token=25,
|
53 |
lm_weight=LM_WEIGHT,
|
54 |
word_score=WORD_SCORE,
|
55 |
)
|
|
|
86 |
inputs=gr.Audio(sources="microphone", type="filepath", label="Speak into the microphone"),
|
87 |
outputs="text",
|
88 |
title="Conformer-Small ASR Model",
|
89 |
+
description="""<b>Trained on:</b> Mozilla Corpus, Personal Recordings, and LibriSpeech — 2900 hrs of audio data.<br>
|
90 |
+
<b>Training Script and Experiment Results</b> available <a href="https://github.com/LuluW8071/Conformer" target="_blank">here</a>""",
|
|
|
91 |
)
|
92 |
|
93 |
interface.launch(share=share)
|