DarwinAnim8or commited on
Commit
e1e2511
·
1 Parent(s): 34d8803

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -1
app.py CHANGED
@@ -1,3 +1,45 @@
1
  import gradio as gr
 
 
2
 
3
- gr.Interface.load("models/openai/whisper-medium").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
+ from openai.whisper import WhisperProcessor
4
 
5
+ # Load the model and tokenizer
6
+ model_id = "openai/whisper-medium"
7
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
8
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
9
+
10
+ # Create a WhisperProcessor instance
11
+ processor = WhisperProcessor(model=model, tokenizer=tokenizer)
12
+
13
+ # Define a function that takes an audio input and returns a transcription
14
+ def transcribe(audio):
15
+ # Use the processor to transcribe the audio
16
+ transcription = processor.transcribe(audio)
17
+
18
+ # Extract the confidence score and the duration from the transcription
19
+ confidence = transcription.confidence
20
+ duration = transcription.duration
21
+
22
+ # Remove the special tokens from the transcription text
23
+ text = transcription.text.replace("<|startoftranscript|>", "").replace("<|endoftranscript|>", "")
24
+
25
+ # Return the text, confidence and duration as outputs
26
+ return text, confidence, duration
27
+
28
+ # Create a Gradio interface with two modes: realtime and file upload
29
+ iface = gr.Interface(
30
+ fn=transcribe,
31
+ inputs=[
32
+ gr.inputs.Audio(source="microphone", type="numpy", label="Realtime Mode"),
33
+ gr.inputs.Audio(source="upload", type="numpy", label="File Upload Mode")
34
+ ],
35
+ outputs=[
36
+ gr.outputs.Textbox(label="Transcription"),
37
+ gr.outputs.Textbox(label="Confidence Score"),
38
+ gr.outputs.Textbox(label="Duration (seconds)")
39
+ ],
40
+ title="Whisper Transcription App",
41
+ description="A Gradio app that uses OpenAI's whisper model to transcribe audio"
42
+ )
43
+
44
+ # Launch the app
45
+ iface.launch()