Spaces:

rohitp1
/

whisper-small-en-noise-robust

Runtime error

App Files Files Community

rohitp1 commited on Apr 21, 2023

Commit

a864a25

1 Parent(s): 120270c

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -14

app.py CHANGED Viewed

@@ -15,16 +15,28 @@ import time
 auth_token = os.environ.get('TOKEN')
-model = WhisperForConditionalGeneration.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
                                                          use_auth_token=auth_token)
-tokenizer = WhisperTokenizer.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
                                                          use_auth_token=auth_token)
-feat_ext = WhisperFeatureExtractor.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
                                                          use_auth_token=auth_token)
-p = pipeline('automatic-speech-recognition', model=model, tokenizer=tokenizer, feature_extractor=feat_ext)
 def transcribe(mic_input, upl_input, model_type):
     if mic_input:
@@ -32,9 +44,12 @@ def transcribe(mic_input, upl_input, model_type):
     else:
         audio = upl_input
     time.sleep(3)
-    text = p(audio)["text"]
     # state = text + " "
-    return text+" "+model_type
@@ -62,7 +77,7 @@ def transcribe(mic_input, upl_input, model_type):
 # demo.launch()
 def clear_inputs_and_outputs():
-    return [None, None, None, None]
 # Main function
 if __name__ == "__main__":
@@ -71,9 +86,9 @@ if __name__ == "__main__":
     with demo:
         gr.Markdown(
             """
-            <center><h1>English speaker accent recognition using Transfer Learning</h1></center> \
-            This space is a demo of an English (precisely UK & Ireland) accent classification model using Keras.<br> \
-    In this space, you can record your voice or upload a wav file and the model will predict the English accent spoken in the audio<br><br>
             """
         )
         with gr.Row():
@@ -85,7 +100,7 @@ if __name__ == "__main__":
                 )
                 with gr.Row():
-                    model_type = gr.inputs.Dropdown(["Noise-Robust-Libri", "Original-Model"], label='Model Type')
                 with gr.Row():
                     clr_btn = gr.Button(value="Clear", variant="secondary")
@@ -106,9 +121,8 @@ if __name__ == "__main__":
             gr.Markdown(
                 """
                 <h4>Credits</h4>
-                Author: <a href="https://twitter.com/fadibadine"> Fadi Badine</a>.<br>
-                Based on the following Keras example <a href="https://keras.io/examples/audio/uk_ireland_accent_recognition"> English speaker accent recognition using Transfer Learning</a> by Fadi Badine<br>
-                Check out the model <a href="https://huggingface.co/keras-io/english-speaker-accent-recognition-using-transfer-learning">here</a>
                 """
             )

 auth_token = os.environ.get('TOKEN')
+model1 = WhisperForConditionalGeneration.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
                                                          use_auth_token=auth_token)
+tokenizer1 = WhisperTokenizer.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
                                                          use_auth_token=auth_token)
+feat_ext1 = WhisperFeatureExtractor.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
                                                          use_auth_token=auth_token)
+model2 = WhisperForConditionalGeneration.from_pretrained("rohitp1/dgx2_whisper_small_finetune_teacher_babble_noise_libri_360_hours_50_epochs_batch_8",
+                                                         use_auth_token=auth_token)
+tokenizer2 = WhisperTokenizer.from_pretrained("rohitp1/dgx2_whisper_small_finetune_teacher_babble_noise_libri_360_hours_50_epochs_batch_8",
+                                                         use_auth_token=auth_token)
+feat_ext2 = WhisperFeatureExtractor.from_pretrained("rohitp1/dgx2_whisper_small_finetune_teacher_babble_noise_libri_360_hours_50_epochs_batch_8",
+                                                         use_auth_token=auth_token)
+p1 = pipeline('automatic-speech-recognition', model=model1, tokenizer=tokenizer1, feature_extractor=feat_ext1)
+p2 = pipeline('automatic-speech-recognition', model=model2, tokenizer=tokenizer2, feature_extractor=feat_ext2)
 def transcribe(mic_input, upl_input, model_type):
     if mic_input:
     else:
         audio = upl_input
     time.sleep(3)
+    if model_type =='Finetuned':
+        text = p2(audio)["text"]
+    else:
+        text = p1(audio)["text"]
     # state = text + " "
+    return text
 # demo.launch()
 def clear_inputs_and_outputs():
+    return [None, None, "RobustDistillation", None]
 # Main function
 if __name__ == "__main__":
     with demo:
         gr.Markdown(
             """
+            <center><h1> Noise Robust English Automatic Speech Recognition LibriSpeech Dataset</h1></center> \
+            This space is a demo of an English ASR model using Huggingface.<br> \
+    In this space, you can record your voice or upload a wav file and the model will predict the text spoken in the audio<br><br>
             """
         )
         with gr.Row():
                 )
                 with gr.Row():
+                    model_type = gr.inputs.Dropdown(["RobustDistillation", "Finetuned"], label='Model Type')
                 with gr.Row():
                     clr_btn = gr.Button(value="Clear", variant="secondary")
             gr.Markdown(
                 """
                 <h4>Credits</h4>
+                Author:  Rohit Prasad <br>
+                Check out the model <a href="https://huggingface.co/rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset">here</a>
                 """
             )