antfraia commited on
Commit
2cdb5e3
·
1 Parent(s): 35c0394

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -8
app.py CHANGED
@@ -1,16 +1,27 @@
1
  import gradio as gr
 
2
 
3
- # Load the model without launching the interface
4
- loaded_model = gr.Interface.load("models/openai/whisper-large-v2", allow_launch=False)
 
5
 
6
- def transcribe_audio(audio_file):
7
- # Use the loaded model to transcribe the audio
8
- return loaded_model(audio_file)
 
 
 
9
 
10
- audio_input = gr.inputs.Audio(type="filepath")
11
- text_output = gr.outputs.Textbox()
 
 
 
 
 
 
 
12
 
13
- # Setup the custom Gradio interface with your configurations
14
  iface = gr.Interface(
15
  fn=transcribe_audio,
16
  inputs=audio_input,
 
1
  import gradio as gr
2
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
 
4
+ # Load model and processor
5
+ processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
6
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
7
 
8
+ def transcribe_audio(audio_path: str) -> str:
9
+ with open(audio_path, "rb") as f:
10
+ audio_data = f.read()
11
+
12
+ # Get audio features
13
+ input_features = processor(audio_data, return_tensors="pt").input_features
14
 
15
+ # Transcribe without forcing any context tokens so that the model tries to automatically detect the language
16
+ model.config.forced_decoder_ids = None
17
+ predicted_ids = model.generate(input_features)
18
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
19
+
20
+ return transcription[0]
21
+
22
+ audio_input = gr.inputs.Audio(type="file", label="Upload an audio file")
23
+ text_output = gr.outputs.Textbox(label="Transcription")
24
 
 
25
  iface = gr.Interface(
26
  fn=transcribe_audio,
27
  inputs=audio_input,