riteshkr commited on
Commit
85743c2
·
verified ·
1 Parent(s): ac47fc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -58
app.py CHANGED
@@ -8,6 +8,7 @@ BATCH_SIZE = 8
8
 
9
  device = 0 if torch.cuda.is_available() else "cpu"
10
 
 
11
  pipe = pipeline(
12
  task="automatic-speech-recognition",
13
  model=MODEL_NAME,
@@ -15,28 +16,20 @@ pipe = pipeline(
15
  device=device,
16
  )
17
 
18
-
19
- # Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
20
  def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
21
  if seconds is not None:
22
  milliseconds = round(seconds * 1000.0)
23
-
24
  hours = milliseconds // 3_600_000
25
  milliseconds -= hours * 3_600_000
26
-
27
  minutes = milliseconds // 60_000
28
  milliseconds -= minutes * 60_000
29
-
30
  seconds = milliseconds // 1_000
31
  milliseconds -= seconds * 1_000
32
-
33
  hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
34
  return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
35
  else:
36
- # we have a malformed timestamp so just return it as is
37
  return seconds
38
 
39
-
40
  def transcribe(file, task, return_timestamps):
41
  outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
42
  text = outputs["text"]
@@ -49,53 +42,35 @@ def transcribe(file, task, return_timestamps):
49
  text = "\n".join(str(feature) for feature in timestamps)
50
  return text
51
 
52
-
53
- demo = gr.Blocks()
54
-
55
- mic_transcribe = gr.Interface(
56
- fn=transcribe,
57
- inputs=[
58
- gr.inputs.Audio(source="microphone", type="filepath", optional=True),
59
- gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
60
- gr.inputs.Checkbox(default=False, label="Return timestamps"),
61
- ],
62
- outputs="text",
63
- layout="horizontal",
64
- theme="huggingface",
65
- title="Whisper Demo: Transcribe Audio",
66
- description=(
67
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
68
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
69
- " of arbitrary length."
70
- ),
71
- allow_flagging="never",
72
- )
73
-
74
- file_transcribe = gr.Interface(
75
- fn=transcribe,
76
- inputs=[
77
- gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
78
- gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
79
- gr.inputs.Checkbox(default=False, label="Return timestamps"),
80
- ],
81
- outputs="text",
82
- layout="horizontal",
83
- theme="huggingface",
84
- title="Whisper Demo: Transcribe Audio",
85
- description=(
86
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
87
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
88
- " of arbitrary length."
89
- ),
90
- examples=[
91
- ["./example.flac", "transcribe", False],
92
- ["./example.flac", "transcribe", True],
93
- ],
94
- cache_examples=True,
95
- allow_flagging="never",
96
- )
97
-
98
- with demo:
99
- gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])
100
-
101
- demo.launch(enable_queue=True)
 
8
 
9
  device = 0 if torch.cuda.is_available() else "cpu"
10
 
11
+ # Initialize the pipeline
12
  pipe = pipeline(
13
  task="automatic-speech-recognition",
14
  model=MODEL_NAME,
 
16
  device=device,
17
  )
18
 
 
 
19
  def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
20
  if seconds is not None:
21
  milliseconds = round(seconds * 1000.0)
 
22
  hours = milliseconds // 3_600_000
23
  milliseconds -= hours * 3_600_000
 
24
  minutes = milliseconds // 60_000
25
  milliseconds -= minutes * 60_000
 
26
  seconds = milliseconds // 1_000
27
  milliseconds -= seconds * 1_000
 
28
  hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
29
  return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
30
  else:
 
31
  return seconds
32
 
 
33
  def transcribe(file, task, return_timestamps):
34
  outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
35
  text = outputs["text"]
 
42
  text = "\n".join(str(feature) for feature in timestamps)
43
  return text
44
 
45
+ # Use Blocks and modern Gradio components
46
+ with gr.Blocks() as demo:
47
+
48
+ with gr.TabbedInterface(["Transcribe Microphone", "Transcribe Audio File"]) as tabs:
49
+
50
+ with gr.TabItem("Transcribe Microphone"):
51
+ mic_audio = gr.Audio(source="microphone", type="filepath", label="Record Speech")
52
+ task = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
53
+ return_timestamps = gr.Checkbox(label="Return timestamps")
54
+ mic_output = gr.Textbox(label="Transcription")
55
+ mic_button = gr.Button("Transcribe")
56
+
57
+ mic_button.click(
58
+ fn=transcribe,
59
+ inputs=[mic_audio, task, return_timestamps],
60
+ outputs=mic_output,
61
+ )
62
+
63
+ with gr.TabItem("Transcribe Audio File"):
64
+ file_audio = gr.Audio(source="upload", type="filepath", label="Upload Audio File")
65
+ task = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
66
+ return_timestamps = gr.Checkbox(label="Return timestamps")
67
+ file_output = gr.Textbox(label="Transcription")
68
+ file_button = gr.Button("Transcribe")
69
+
70
+ file_button.click(
71
+ fn=transcribe,
72
+ inputs=[file_audio, task, return_timestamps],
73
+ outputs=file_output,
74
+ )
75
+
76
+ demo.launch(enable_queue=True)