DarwinAnim8or commited on
Commit
3528c71
·
1 Parent(s): ce40abd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -12
app.py CHANGED
@@ -1,20 +1,43 @@
1
  import gradio as gr
2
- import whisper
3
 
4
- def transcribe_audio(audio_file):
5
- model = whisper.load_model("base")
6
- result = model.transcribe(audio_file)
7
- return result["text"]
8
 
9
- audio_input = gr.inputs.Audio(source="upload", type="file")
10
- output_text = gr.outputs.Textbox()
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  iface = gr.Interface(
13
- fn=transcribe_audio,
14
- inputs=audio_input,
15
- outputs=output_text,
16
- title="Audio Transcription App",
17
- description="Upload an audio file or record in real-time and hit the 'Submit' button"
 
 
 
 
 
 
 
18
  )
19
 
 
20
  iface.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, WhisperForConditionalGeneration, WhisperProcessor
3
 
4
+ # Load the model and processor
5
+ model_id = "openai/whisper-medium"
6
+ processor = WhisperProcessor(model_id)
 
7
 
8
+ model = WhisperModel.from_pretrained(model_id)
9
+ model.config.forced_decoder_ids = None
10
 
11
+ # Define a function that takes an audio input and returns a transcription
12
+ def transcribe(audio):
13
+ # Use the processor to transcribe the audio
14
+ transcription = processor.transcribe(audio)
15
+
16
+ # Extract the confidence score and the duration from the transcription
17
+ confidence = transcription.confidence
18
+ duration = transcription.duration
19
+
20
+ # Remove the special tokens from the transcription text
21
+ text = transcription.text.replace("<|startoftranscript|>", "").replace("<|endoftranscript|>", "")
22
+
23
+ # Return the text, confidence and duration as outputs
24
+ return text, confidence, duration
25
+
26
+ # Create a Gradio interface with two modes: realtime and file upload
27
  iface = gr.Interface(
28
+ fn=transcribe,
29
+ inputs=[
30
+ gr.inputs.Audio(source="microphone", type="numpy", label="Realtime Mode"),
31
+ gr.inputs.Audio(source="upload", type="numpy", label="File Upload Mode")
32
+ ],
33
+ outputs=[
34
+ gr.outputs.Textbox(label="Transcription"),
35
+ gr.outputs.Textbox(label="Confidence Score"),
36
+ gr.outputs.Textbox(label="Duration (seconds)")
37
+ ],
38
+ title="Whisper Transcription App",
39
+ description="A Gradio app that uses OpenAI's whisper model to transcribe audio"
40
  )
41
 
42
+ # Launch the app
43
  iface.launch()