josh-salako commited on
Commit
99740fe
·
verified ·
1 Parent(s): d400261

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
4
+
5
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
6
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
7
+
8
+ model_id = "openai/whisper-large-v3"
9
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True)
10
+ model.to(device)
11
+ processor = AutoProcessor.from_pretrained(model_id)
12
+
13
+ pipe = pipeline(
14
+ "automatic-speech-recognition",
15
+ model=model,
16
+ tokenizer=processor.tokenizer,
17
+ feature_extractor=processor.feature_extractor,
18
+ torch_dtype=torch_dtype,
19
+ device=device,
20
+ )
21
+
22
+ def transcribe(audio):
23
+ result = pipe(audio)["text"]
24
+ return result
25
+
26
+ iface = gr.Interface(
27
+ fn=transcribe,
28
+ inputs=gr.Audio(source="microphone", type="filepath"),
29
+ outputs="text",
30
+ live=True,
31
+ title="Speech-to-Text Demo",
32
+ description="Transcribe speech to text using the Whisper model."
33
+ )
34
+
35
+ if __name__ == "__main__":
36
+ iface.launch()