Pranjal12345 commited on
Commit
e932a3b
·
1 Parent(s): 531e098
Files changed (3) hide show
  1. app.py +75 -0
  2. packages.txt +1 -0
  3. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+ #from transformers.pipelines.audio_utils import ffmpeg_read
6
+
7
+ # import tempfile
8
+ # import os
9
+
10
+ MODEL_NAME = "openai/whisper-large-v2"
11
+ BATCH_SIZE = 8
12
+ FILE_LIMIT_MB = 1000
13
+
14
+ device = 0 if torch.cuda.is_available() else "cpu"
15
+
16
+ pipe = pipeline(
17
+ task="automatic-speech-recognition",
18
+ model=MODEL_NAME,
19
+ chunk_length_s=30,
20
+ device=device,
21
+ )
22
+
23
+
24
+ def transcribe(inputs, task):
25
+ if inputs is None:
26
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
27
+
28
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
29
+ return text
30
+
31
+
32
+ demo = gr.Blocks()
33
+
34
+ mf_transcribe = gr.Interface(
35
+ fn=transcribe,
36
+ inputs=[
37
+ gr.inputs.Audio(source="microphone", type="filepath", optional=True),
38
+ gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
39
+ ],
40
+ outputs="text",
41
+ layout="horizontal",
42
+ theme="huggingface",
43
+ title="Whisper Large V2: Transcribe Audio",
44
+ description=(
45
+ "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
46
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
47
+ " of arbitrary length."
48
+ ),
49
+ allow_flagging="never",
50
+ )
51
+
52
+ file_transcribe = gr.Interface(
53
+ fn=transcribe,
54
+ inputs=[
55
+ gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
56
+ gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
57
+ ],
58
+ outputs="text",
59
+ layout="horizontal",
60
+ theme="huggingface",
61
+ title="Whisper Large V2: Transcribe Audio",
62
+ description=(
63
+ "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
64
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
65
+ " of arbitrary length."
66
+ ),
67
+ allow_flagging="never",
68
+ )
69
+
70
+
71
+ with demo:
72
+ gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
73
+
74
+ demo.launch(enable_queue=True)
75
+
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/huggingface/transformers
2
+ torch
3
+ yt-dlp