Baghdad99 commited on
Commit
382ed84
·
1 Parent(s): 3369603

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTextToWaveform
3
- import torch # Add the import statement for torch
4
 
5
  # Load your pretrained models
6
  asr_model = Wav2Vec2ForCTC.from_pretrained("Baghdad99/saad-speech-recognition-hausa-audio-to-text")
@@ -25,7 +25,7 @@ def translate_speech(speech):
25
  # Transcribe the speech to text
26
  inputs = asr_processor(audio_signal, return_tensors="pt", padding=True)
27
  logits = asr_model(inputs.input_values).logits
28
- predicted_ids = torch.argmax(logits, dim=-1) # Add torch module to access argmax function
29
  transcription = asr_processor.decode(predicted_ids[0])
30
 
31
  # Translate the text
@@ -40,5 +40,5 @@ def translate_speech(speech):
40
 
41
 
42
  # Define the Gradio interface
43
- iface = gr.Interface(fn=translate_speech, inputs=gr.inputs.Audio(source="microphone"), outputs="audio")
44
  iface.launch()
 
1
  import gradio as gr
2
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTextToWaveform
3
+ import torch
4
 
5
  # Load your pretrained models
6
  asr_model = Wav2Vec2ForCTC.from_pretrained("Baghdad99/saad-speech-recognition-hausa-audio-to-text")
 
25
  # Transcribe the speech to text
26
  inputs = asr_processor(audio_signal, return_tensors="pt", padding=True)
27
  logits = asr_model(inputs.input_values).logits
28
+ predicted_ids = torch.argmax(logits, dim=-1)
29
  transcription = asr_processor.decode(predicted_ids[0])
30
 
31
  # Translate the text
 
40
 
41
 
42
  # Define the Gradio interface
43
+ iface = gr.Interface(fn=translate_speech, inputs=gr.inputs.Audio(source="microphone", type="numpy"), outputs="audio")
44
  iface.launch()