Utpal21022102 commited on
Commit
cbaccb8
·
verified ·
1 Parent(s): eabd856

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
4
+ import gradio as gr
5
+ import time
6
+
7
+ model_id = "sanket003/whisper-darpg"
8
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
9
+ model_id, torch_dtype=torch.float32, low_cpu_mem_usage=False, use_safetensors=True
10
+ )
11
+ processor = AutoProcessor.from_pretrained(model_id)
12
+ pipe = pipeline(
13
+ "automatic-speech-recognition",
14
+ model=model,
15
+ tokenizer=processor.tokenizer,
16
+ feature_extractor=processor.feature_extractor,
17
+ torch_dtype=torch.float32,
18
+ generate_kwargs={"language": "english","task":"translate"},
19
+ return_timestamps= True
20
+ )
21
+
22
+ def transcribe_audio(audio, file):
23
+ if audio:
24
+ result = pipe(audio)
25
+ elif file:
26
+ result = pipe(file)
27
+ pass
28
+ else:
29
+ result = {"text": "No input provided."}
30
+ return result["text"]
31
+
32
+ iface = gr.Interface(
33
+ title="Transforming Speech into Text",
34
+ fn=transcribe_audio,
35
+ inputs=[
36
+ gr.Audio(sources="microphone", type="filepath", label="Record from Microphone"),
37
+ gr.File(type="filepath", label="Upload Audio File"),
38
+ ],
39
+ outputs=["textbox"],
40
+ description="Choose either microphone input or upload an audio file.",
41
+ )
42
+ iface.launch(share=True,debug=True)