Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -15,16 +15,28 @@ import time
|
|
15 |
|
16 |
|
17 |
auth_token = os.environ.get('TOKEN')
|
18 |
-
|
19 |
use_auth_token=auth_token)
|
20 |
|
21 |
-
|
22 |
use_auth_token=auth_token)
|
23 |
|
24 |
-
|
25 |
use_auth_token=auth_token)
|
26 |
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def transcribe(mic_input, upl_input, model_type):
|
30 |
if mic_input:
|
@@ -32,9 +44,12 @@ def transcribe(mic_input, upl_input, model_type):
|
|
32 |
else:
|
33 |
audio = upl_input
|
34 |
time.sleep(3)
|
35 |
-
|
|
|
|
|
|
|
36 |
# state = text + " "
|
37 |
-
return text
|
38 |
|
39 |
|
40 |
|
@@ -62,7 +77,7 @@ def transcribe(mic_input, upl_input, model_type):
|
|
62 |
# demo.launch()
|
63 |
|
64 |
def clear_inputs_and_outputs():
|
65 |
-
return [None, None,
|
66 |
|
67 |
# Main function
|
68 |
if __name__ == "__main__":
|
@@ -71,9 +86,9 @@ if __name__ == "__main__":
|
|
71 |
with demo:
|
72 |
gr.Markdown(
|
73 |
"""
|
74 |
-
<center><h1>English
|
75 |
-
This space is a demo of an English
|
76 |
-
In this space, you can record your voice or upload a wav file and the model will predict the
|
77 |
"""
|
78 |
)
|
79 |
with gr.Row():
|
@@ -85,7 +100,7 @@ if __name__ == "__main__":
|
|
85 |
)
|
86 |
|
87 |
with gr.Row():
|
88 |
-
model_type = gr.inputs.Dropdown(["
|
89 |
|
90 |
with gr.Row():
|
91 |
clr_btn = gr.Button(value="Clear", variant="secondary")
|
@@ -106,9 +121,8 @@ if __name__ == "__main__":
|
|
106 |
gr.Markdown(
|
107 |
"""
|
108 |
<h4>Credits</h4>
|
109 |
-
Author: <
|
110 |
-
|
111 |
-
Check out the model <a href="https://huggingface.co/keras-io/english-speaker-accent-recognition-using-transfer-learning">here</a>
|
112 |
"""
|
113 |
)
|
114 |
|
|
|
15 |
|
16 |
|
17 |
auth_token = os.environ.get('TOKEN')
|
18 |
+
model1 = WhisperForConditionalGeneration.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
|
19 |
use_auth_token=auth_token)
|
20 |
|
21 |
+
tokenizer1 = WhisperTokenizer.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
|
22 |
use_auth_token=auth_token)
|
23 |
|
24 |
+
feat_ext1 = WhisperFeatureExtractor.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
|
25 |
use_auth_token=auth_token)
|
26 |
|
27 |
+
|
28 |
+
model2 = WhisperForConditionalGeneration.from_pretrained("rohitp1/dgx2_whisper_small_finetune_teacher_babble_noise_libri_360_hours_50_epochs_batch_8",
|
29 |
+
use_auth_token=auth_token)
|
30 |
+
|
31 |
+
tokenizer2 = WhisperTokenizer.from_pretrained("rohitp1/dgx2_whisper_small_finetune_teacher_babble_noise_libri_360_hours_50_epochs_batch_8",
|
32 |
+
use_auth_token=auth_token)
|
33 |
+
|
34 |
+
feat_ext2 = WhisperFeatureExtractor.from_pretrained("rohitp1/dgx2_whisper_small_finetune_teacher_babble_noise_libri_360_hours_50_epochs_batch_8",
|
35 |
+
use_auth_token=auth_token)
|
36 |
+
|
37 |
+
|
38 |
+
p1 = pipeline('automatic-speech-recognition', model=model1, tokenizer=tokenizer1, feature_extractor=feat_ext1)
|
39 |
+
p2 = pipeline('automatic-speech-recognition', model=model2, tokenizer=tokenizer2, feature_extractor=feat_ext2)
|
40 |
|
41 |
def transcribe(mic_input, upl_input, model_type):
|
42 |
if mic_input:
|
|
|
44 |
else:
|
45 |
audio = upl_input
|
46 |
time.sleep(3)
|
47 |
+
if model_type =='Finetuned':
|
48 |
+
text = p2(audio)["text"]
|
49 |
+
else:
|
50 |
+
text = p1(audio)["text"]
|
51 |
# state = text + " "
|
52 |
+
return text
|
53 |
|
54 |
|
55 |
|
|
|
77 |
# demo.launch()
|
78 |
|
79 |
def clear_inputs_and_outputs():
|
80 |
+
return [None, None, "RobustDistillation", None]
|
81 |
|
82 |
# Main function
|
83 |
if __name__ == "__main__":
|
|
|
86 |
with demo:
|
87 |
gr.Markdown(
|
88 |
"""
|
89 |
+
<center><h1> Noise Robust English Automatic Speech Recognition LibriSpeech Dataset</h1></center> \
|
90 |
+
This space is a demo of an English ASR model using Huggingface.<br> \
|
91 |
+
In this space, you can record your voice or upload a wav file and the model will predict the text spoken in the audio<br><br>
|
92 |
"""
|
93 |
)
|
94 |
with gr.Row():
|
|
|
100 |
)
|
101 |
|
102 |
with gr.Row():
|
103 |
+
model_type = gr.inputs.Dropdown(["RobustDistillation", "Finetuned"], label='Model Type')
|
104 |
|
105 |
with gr.Row():
|
106 |
clr_btn = gr.Button(value="Clear", variant="secondary")
|
|
|
121 |
gr.Markdown(
|
122 |
"""
|
123 |
<h4>Credits</h4>
|
124 |
+
Author: Rohit Prasad <br>
|
125 |
+
Check out the model <a href="https://huggingface.co/rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset">here</a>
|
|
|
126 |
"""
|
127 |
)
|
128 |
|