Hemg commited on
Commit
c394a78
·
verified ·
1 Parent(s): 5149cfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -91
app.py CHANGED
@@ -1,92 +1,8 @@
1
- # import torch
2
- # from transformers import pipeline
3
- # import gradio as gr
4
-
5
- # MODEL_NAME = "Hemg/ASRr"
6
- # BATCH_SIZE = 8
7
-
8
- # device = 0 if torch.cuda.is_available() else "cpu"
9
-
10
- # pipe = pipeline(
11
- # task="automatic-speech-recognition",
12
- # model=MODEL_NAME,
13
- # chunk_length_s=30,
14
- # device=device,
15
- # return_timestamps='word'
16
- # )
17
-
18
-
19
- # # Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
20
- # def format_timestamp(
21
- # seconds: float, always_include_hours: bool = False, decimal_marker: str = "."
22
- # ):
23
- # if seconds is not None:
24
- # milliseconds = round(seconds * 1000.0)
25
-
26
- # hours = milliseconds // 3_600_000
27
- # milliseconds -= hours * 3_600_000
28
-
29
- # minutes = milliseconds // 60_000
30
- # milliseconds -= minutes * 60_000
31
-
32
- # seconds = milliseconds // 1_000
33
- # milliseconds -= seconds * 1_000
34
-
35
- # hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
36
- # return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
37
- # else:
38
- # # we have a malformed timestamp so just return it as is
39
- # return seconds
40
-
41
-
42
- # def transcribe(file, return_timestamps):
43
- # outputs = pipe(
44
- # file,
45
- # batch_size=BATCH_SIZE,
46
- # return_timestamps=return_timestamps,
47
- # )
48
- # text = outputs["text"]
49
- # if return_timestamps:
50
- # timestamps = outputs["chunks"]
51
- # timestamps = [
52
- # f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
53
- # for chunk in timestamps
54
- # ]
55
- # text = "\n".join(str(feature) for feature in timestamps)
56
- # return text
57
-
58
-
59
- # demo = gr.Interface(
60
- # fn=transcribe,
61
- # inputs=[
62
- # #gr.Audio(label="Audio", type="filepath"),
63
- # gr.Audio(sources=["upload", "microphone"], type="filepath"),
64
- # gr.Checkbox(label="Return timestamps"),
65
- # ],
66
- # outputs=gr.Textbox(show_copy_button=True, label="Text"),
67
- # title="Automatic Speech Recognition",
68
- # examples=[
69
- # ["examples/example.wav", False],
70
- # ["examples/example.wav", True],
71
- # ],
72
- # cache_examples=True,
73
- # allow_flagging="never",
74
- # )
75
-
76
- # demo.launch()
77
-
78
-
79
-
80
-
81
-
82
-
83
-
84
-
85
  import torch
86
  from transformers import pipeline
87
  import gradio as gr
88
 
89
- MODEL_NAME = "JackismyShephard/whisper-tiny-finetuned-minds14"
90
  BATCH_SIZE = 8
91
 
92
  device = 0 if torch.cuda.is_available() else "cpu"
@@ -94,27 +10,64 @@ device = 0 if torch.cuda.is_available() else "cpu"
94
  pipe = pipeline(
95
  task="automatic-speech-recognition",
96
  model=MODEL_NAME,
 
97
  device=device,
 
98
  )
99
 
100
 
101
- def transcribe(file):
102
- outputs = pipe(file, batch_size=BATCH_SIZE)
103
- text = " ".join([output['transcription'] for output in outputs])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  return text
105
 
106
 
107
-
108
-
109
  demo = gr.Interface(
110
  fn=transcribe,
111
  inputs=[
 
112
  gr.Audio(sources=["upload", "microphone"], type="filepath"),
 
113
  ],
114
  outputs=gr.Textbox(show_copy_button=True, label="Text"),
115
  title="Automatic Speech Recognition",
116
  examples=[
117
- ["examples/example.wav"],
 
118
  ],
119
  cache_examples=True,
120
  allow_flagging="never",
@@ -122,3 +75,10 @@ demo = gr.Interface(
122
 
123
  demo.launch()
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  from transformers import pipeline
3
  import gradio as gr
4
 
5
+ MODEL_NAME = "Hemg/ASRr"
6
  BATCH_SIZE = 8
7
 
8
  device = 0 if torch.cuda.is_available() else "cpu"
 
10
  pipe = pipeline(
11
  task="automatic-speech-recognition",
12
  model=MODEL_NAME,
13
+ chunk_length_s=30,
14
  device=device,
15
+ return_timestamps='word'
16
  )
17
 
18
 
19
+ # Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
20
+ def format_timestamp(
21
+ seconds: float, always_include_hours: bool = False, decimal_marker: str = "."
22
+ ):
23
+ if seconds is not None:
24
+ milliseconds = round(seconds * 1000.0)
25
+
26
+ hours = milliseconds // 3_600_000
27
+ milliseconds -= hours * 3_600_000
28
+
29
+ minutes = milliseconds // 60_000
30
+ milliseconds -= minutes * 60_000
31
+
32
+ seconds = milliseconds // 1_000
33
+ milliseconds -= seconds * 1_000
34
+
35
+ hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
36
+ return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
37
+ else:
38
+ # we have a malformed timestamp so just return it as is
39
+ return seconds
40
+
41
+
42
+ def transcribe(file, return_timestamps):
43
+ outputs = pipe(
44
+ file,
45
+ batch_size=BATCH_SIZE,
46
+ return_timestamps=return_timestamps,
47
+ )
48
+ text = outputs["text"]
49
+ if return_timestamps:
50
+ timestamps = outputs["chunks"]
51
+ timestamps = [
52
+ f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
53
+ for chunk in timestamps
54
+ ]
55
+ text = "\n".join(str(feature) for feature in timestamps)
56
  return text
57
 
58
 
 
 
59
  demo = gr.Interface(
60
  fn=transcribe,
61
  inputs=[
62
+ #gr.Audio(label="Audio", type="filepath"),
63
  gr.Audio(sources=["upload", "microphone"], type="filepath"),
64
+ gr.Checkbox(label="Return timestamps"),
65
  ],
66
  outputs=gr.Textbox(show_copy_button=True, label="Text"),
67
  title="Automatic Speech Recognition",
68
  examples=[
69
+ ["examples/example.wav", False],
70
+ ["examples/example.wav", True],
71
  ],
72
  cache_examples=True,
73
  allow_flagging="never",
 
75
 
76
  demo.launch()
77
 
78
+
79
+
80
+
81
+
82
+
83
+
84
+