UtpaL2102 commited on
Commit
ea23c90
·
1 Parent(s): ca6e3e7

Add application file

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+ import gradio as gr
4
+
5
+ # Load model and processor
6
+ model_id = "sanket003/whisper-darpg"
7
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
8
+ model_id, torch_dtype=torch.float32, low_cpu_mem_usage=False, use_safetensors=True
9
+ )
10
+ processor = AutoProcessor.from_pretrained(model_id)
11
+
12
+ # Define the pipeline
13
+ pipe = pipeline(
14
+ "automatic-speech-recognition",
15
+ model=model,
16
+ tokenizer=processor.tokenizer,
17
+ feature_extractor=processor.feature_extractor,
18
+ torch_dtype=torch.float32,
19
+ generate_kwargs={"language": "english"},
20
+ return_timestamps=True
21
+ )
22
+
23
+ # Define the Gradio interface function
24
+ def transcribe_audio(audio, file):
25
+ if audio:
26
+ result = pipe(audio)
27
+ elif file:
28
+ result = pipe(file)
29
+ else:
30
+ result = {"text": "No input provided."}
31
+ return result["text"]
32
+
33
+ # Gradio interface
34
+ iface = gr.Interface(
35
+ title="Transforming Speech into Text",
36
+ fn=transcribe_audio,
37
+ inputs=[
38
+ gr.Audio(source="microphone", type="filepath", label="Record from Microphone"),
39
+ gr.File(type="filepath", label="Upload Audio File"),
40
+ ],
41
+ outputs=["textbox"],
42
+ description="Choose either microphone input or upload an audio file.",
43
+ )
44
+
45
+ # Run the app
46
+ if __name__ == "__main__":
47
+ iface.launch(share=True, debug=True)