Tollef Jørgensen commited on
Commit
253c4ed
1 Parent(s): a8dc314

first test of nb small

Browse files
Files changed (1) hide show
  1. app.py +54 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
+
5
+ model_id = "NbAiLab/nb-whisper-small-beta"
6
+ transcriber = pipeline("automatic-speech-recognition", model=model_id)
7
+ total_time = 0
8
+ counter = 0
9
+
10
+
11
+ def make_timestamp(ref):
12
+ global total_time
13
+ hh = int((total_time + ref) / 3600)
14
+ mm = int((total_time + ref) / 60) % 60
15
+ ss = int((total_time + ref) % 60)
16
+ mmm = int((total_time + ref) % 1000)
17
+ return f"{hh:02d}:{mm:02d}:{ss:02d},{mmm:03d}"
18
+
19
+
20
+ def transcribe(audio):
21
+ global counter
22
+ global total_time
23
+ sr, y = audio
24
+ y = y.astype(np.float32)
25
+ y /= np.max(np.abs(y))
26
+
27
+ conf = {"sampling_rate": sr, "raw": y}
28
+ kwargs = {"task": "transcribe", "language": "no"}
29
+ res = transcriber(conf, generate_kwargs=kwargs, return_timestamps=True)
30
+ chunks = res["chunks"]
31
+ timestamps = [c["timestamp"] for c in chunks]
32
+ text = [c["text"].strip() for c in chunks]
33
+
34
+ entries = []
35
+ for (start, end), txt in zip(timestamps, text):
36
+ start_srt = make_timestamp(start)
37
+ end_srt = make_timestamp(end)
38
+ srt_entry = f"{counter}\n{start_srt} --> {end_srt}\n{txt}\n"
39
+ entries.append(srt_entry)
40
+ total_time += end
41
+ counter += 1
42
+
43
+ return "\n".join(entries)
44
+
45
+
46
+
47
+ demo = gr.Interface(
48
+ transcribe,
49
+ gr.Audio(source="microphone"),
50
+ "text",
51
+ )
52
+
53
+ demo.launch()
54
+