akadriu commited on
Commit
0d320bd
·
verified ·
1 Parent(s): bca79ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -1,12 +1,12 @@
1
- from transformers import pipeline
 
2
  import gradio as gr
3
  import librosa
4
- import numpy as np
5
- import os
6
-
7
- from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
8
 
 
9
  hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN")
 
 
10
  processor = WhisperProcessor.from_pretrained("akadriu/whisper-medium-sq", token=hf_token)
11
  model = WhisperForConditionalGeneration.from_pretrained("akadriu/whisper-medium-sq", token=hf_token)
12
 
@@ -15,15 +15,16 @@ def transcribe(audio):
15
  input_features = processor(audio_input, sampling_rate=16000, return_tensors="pt").input_features
16
  predicted_ids = model.generate(input_features)
17
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
18
- text = transcription
19
  return text
20
 
 
21
  iface = gr.Interface(
22
  fn=transcribe,
23
- inputs=gr.Audio(source="microphone", type="filepath", label="Record your voice"),
24
  outputs="text",
25
  title="Whisper Medium Shqip",
26
  description="Realtime demo for Sq speech recognition using a fine-tuned Whisper medium model.",
27
  )
28
 
29
- iface.launch(share=True)
 
1
+ import os
2
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
  import gradio as gr
4
  import librosa
 
 
 
 
5
 
6
+ # Fetch the token from the environment
7
  hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN")
8
+
9
+ # Load the processor and model using the token for authentication
10
  processor = WhisperProcessor.from_pretrained("akadriu/whisper-medium-sq", token=hf_token)
11
  model = WhisperForConditionalGeneration.from_pretrained("akadriu/whisper-medium-sq", token=hf_token)
12
 
 
15
  input_features = processor(audio_input, sampling_rate=16000, return_tensors="pt").input_features
16
  predicted_ids = model.generate(input_features)
17
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
18
+ text = transcription[0] # Decode returns a list
19
  return text
20
 
21
+ # Updated interface without the 'source' argument
22
  iface = gr.Interface(
23
  fn=transcribe,
24
+ inputs=gr.Audio(type="filepath", label="Record your voice"), # Removed 'source' argument
25
  outputs="text",
26
  title="Whisper Medium Shqip",
27
  description="Realtime demo for Sq speech recognition using a fine-tuned Whisper medium model.",
28
  )
29
 
30
+ iface.launch(share=True)