rohitp1 commited on
Commit
a864a25
·
1 Parent(s): 120270c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -14
app.py CHANGED
@@ -15,16 +15,28 @@ import time
15
 
16
 
17
  auth_token = os.environ.get('TOKEN')
18
- model = WhisperForConditionalGeneration.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
19
  use_auth_token=auth_token)
20
 
21
- tokenizer = WhisperTokenizer.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
22
  use_auth_token=auth_token)
23
 
24
- feat_ext = WhisperFeatureExtractor.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
25
  use_auth_token=auth_token)
26
 
27
- p = pipeline('automatic-speech-recognition', model=model, tokenizer=tokenizer, feature_extractor=feat_ext)
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def transcribe(mic_input, upl_input, model_type):
30
  if mic_input:
@@ -32,9 +44,12 @@ def transcribe(mic_input, upl_input, model_type):
32
  else:
33
  audio = upl_input
34
  time.sleep(3)
35
- text = p(audio)["text"]
 
 
 
36
  # state = text + " "
37
- return text+" "+model_type
38
 
39
 
40
 
@@ -62,7 +77,7 @@ def transcribe(mic_input, upl_input, model_type):
62
  # demo.launch()
63
 
64
  def clear_inputs_and_outputs():
65
- return [None, None, None, None]
66
 
67
  # Main function
68
  if __name__ == "__main__":
@@ -71,9 +86,9 @@ if __name__ == "__main__":
71
  with demo:
72
  gr.Markdown(
73
  """
74
- <center><h1>English speaker accent recognition using Transfer Learning</h1></center> \
75
- This space is a demo of an English (precisely UK & Ireland) accent classification model using Keras.<br> \
76
- In this space, you can record your voice or upload a wav file and the model will predict the English accent spoken in the audio<br><br>
77
  """
78
  )
79
  with gr.Row():
@@ -85,7 +100,7 @@ if __name__ == "__main__":
85
  )
86
 
87
  with gr.Row():
88
- model_type = gr.inputs.Dropdown(["Noise-Robust-Libri", "Original-Model"], label='Model Type')
89
 
90
  with gr.Row():
91
  clr_btn = gr.Button(value="Clear", variant="secondary")
@@ -106,9 +121,8 @@ if __name__ == "__main__":
106
  gr.Markdown(
107
  """
108
  <h4>Credits</h4>
109
- Author: <a href="https://twitter.com/fadibadine"> Fadi Badine</a>.<br>
110
- Based on the following Keras example <a href="https://keras.io/examples/audio/uk_ireland_accent_recognition"> English speaker accent recognition using Transfer Learning</a> by Fadi Badine<br>
111
- Check out the model <a href="https://huggingface.co/keras-io/english-speaker-accent-recognition-using-transfer-learning">here</a>
112
  """
113
  )
114
 
 
15
 
16
 
17
  auth_token = os.environ.get('TOKEN')
18
+ model1 = WhisperForConditionalGeneration.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
19
  use_auth_token=auth_token)
20
 
21
+ tokenizer1 = WhisperTokenizer.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
22
  use_auth_token=auth_token)
23
 
24
+ feat_ext1 = WhisperFeatureExtractor.from_pretrained("rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset",
25
  use_auth_token=auth_token)
26
 
27
+
28
+ model2 = WhisperForConditionalGeneration.from_pretrained("rohitp1/dgx2_whisper_small_finetune_teacher_babble_noise_libri_360_hours_50_epochs_batch_8",
29
+ use_auth_token=auth_token)
30
+
31
+ tokenizer2 = WhisperTokenizer.from_pretrained("rohitp1/dgx2_whisper_small_finetune_teacher_babble_noise_libri_360_hours_50_epochs_batch_8",
32
+ use_auth_token=auth_token)
33
+
34
+ feat_ext2 = WhisperFeatureExtractor.from_pretrained("rohitp1/dgx2_whisper_small_finetune_teacher_babble_noise_libri_360_hours_50_epochs_batch_8",
35
+ use_auth_token=auth_token)
36
+
37
+
38
+ p1 = pipeline('automatic-speech-recognition', model=model1, tokenizer=tokenizer1, feature_extractor=feat_ext1)
39
+ p2 = pipeline('automatic-speech-recognition', model=model2, tokenizer=tokenizer2, feature_extractor=feat_ext2)
40
 
41
  def transcribe(mic_input, upl_input, model_type):
42
  if mic_input:
 
44
  else:
45
  audio = upl_input
46
  time.sleep(3)
47
+ if model_type =='Finetuned':
48
+ text = p2(audio)["text"]
49
+ else:
50
+ text = p1(audio)["text"]
51
  # state = text + " "
52
+ return text
53
 
54
 
55
 
 
77
  # demo.launch()
78
 
79
  def clear_inputs_and_outputs():
80
+ return [None, None, "RobustDistillation", None]
81
 
82
  # Main function
83
  if __name__ == "__main__":
 
86
  with demo:
87
  gr.Markdown(
88
  """
89
+ <center><h1> Noise Robust English Automatic Speech Recognition LibriSpeech Dataset</h1></center> \
90
+ This space is a demo of an English ASR model using Huggingface.<br> \
91
+ In this space, you can record your voice or upload a wav file and the model will predict the text spoken in the audio<br><br>
92
  """
93
  )
94
  with gr.Row():
 
100
  )
101
 
102
  with gr.Row():
103
+ model_type = gr.inputs.Dropdown(["RobustDistillation", "Finetuned"], label='Model Type')
104
 
105
  with gr.Row():
106
  clr_btn = gr.Button(value="Clear", variant="secondary")
 
121
  gr.Markdown(
122
  """
123
  <h4>Credits</h4>
124
+ Author: Rohit Prasad <br>
125
+ Check out the model <a href="https://huggingface.co/rohitp1/kkkh_whisper_small_distillation_att_loss_libri360_epochs_100_batch_4_concat_dataset">here</a>
 
126
  """
127
  )
128