DarwinAnim8or commited on
Commit
ce40abd
·
1 Parent(s): 8ac09b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -36
app.py CHANGED
@@ -1,44 +1,20 @@
1
  import gradio as gr
2
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, WhisperModel, WhisperProcessor
3
 
4
- # Load the model and tokenizer
5
- model_id = "openai/whisper-medium"
6
- model = WhisperModel.from_pretrained(model_id)
7
- tokenizer = AutoTokenizer.from_pretrained(model_id)
8
 
9
- # Create a WhisperProcessor instance
10
- processor = WhisperProcessor(model=model, tokenizer=tokenizer)
11
 
12
- # Define a function that takes an audio input and returns a transcription
13
- def transcribe(audio):
14
- # Use the processor to transcribe the audio
15
- transcription = processor.transcribe(audio)
16
-
17
- # Extract the confidence score and the duration from the transcription
18
- confidence = transcription.confidence
19
- duration = transcription.duration
20
-
21
- # Remove the special tokens from the transcription text
22
- text = transcription.text.replace("<|startoftranscript|>", "").replace("<|endoftranscript|>", "")
23
-
24
- # Return the text, confidence and duration as outputs
25
- return text, confidence, duration
26
-
27
- # Create a Gradio interface with two modes: realtime and file upload
28
  iface = gr.Interface(
29
- fn=transcribe,
30
- inputs=[
31
- gr.inputs.Audio(source="microphone", type="numpy", label="Realtime Mode"),
32
- gr.inputs.Audio(source="upload", type="numpy", label="File Upload Mode")
33
- ],
34
- outputs=[
35
- gr.outputs.Textbox(label="Transcription"),
36
- gr.outputs.Textbox(label="Confidence Score"),
37
- gr.outputs.Textbox(label="Duration (seconds)")
38
- ],
39
- title="Whisper Transcription App",
40
- description="A Gradio app that uses OpenAI's whisper model to transcribe audio"
41
  )
42
 
43
- # Launch the app
44
  iface.launch()
 
1
  import gradio as gr
2
+ import whisper
3
 
4
+ def transcribe_audio(audio_file):
5
+ model = whisper.load_model("base")
6
+ result = model.transcribe(audio_file)
7
+ return result["text"]
8
 
9
+ audio_input = gr.inputs.Audio(source="upload", type="file")
10
+ output_text = gr.outputs.Textbox()
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  iface = gr.Interface(
13
+ fn=transcribe_audio,
14
+ inputs=audio_input,
15
+ outputs=output_text,
16
+ title="Audio Transcription App",
17
+ description="Upload an audio file or record in real-time and hit the 'Submit' button"
 
 
 
 
 
 
 
18
  )
19
 
 
20
  iface.launch()