pere commited on
Commit
0f57ece
·
1 Parent(s): ecc7149
Files changed (1) hide show
  1. app.py +47 -43
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import time
2
  import os
 
3
 
4
  import torch
5
 
@@ -21,14 +22,14 @@ lang = "no"
21
  share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
22
  auth_token = os.environ.get("AUTH_TOKEN") or True
23
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24
- print(f"Using device: {device}")
25
 
26
  @spaces.GPU(duration=60 * 2)
27
  def pipe(file, return_timestamps=False):
28
  asr = pipeline(
29
  task="automatic-speech-recognition",
30
  model=MODEL_NAME,
31
- chunk_length_s=26,
32
  device=device,
33
  token=auth_token,
34
  torch_dtype=torch.float16,
@@ -41,9 +42,17 @@ def pipe(file, return_timestamps=False):
41
  )
42
  return asr(file, return_timestamps=return_timestamps, batch_size=24)
43
 
 
 
 
 
 
 
 
44
  def transcribe(file, return_timestamps=False):
45
  if not return_timestamps:
46
  text = pipe(file)["text"]
 
47
  else:
48
  chunks = pipe(file, return_timestamps=True)["chunks"]
49
  text = []
@@ -52,8 +61,8 @@ def transcribe(file, return_timestamps=False):
52
  end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
53
  line = f"[{start_time} -> {end_time}] {chunk['text']}"
54
  text.append(line)
55
- text = "\n".join(text)
56
- return text
57
 
58
  def _return_yt_html_embed(yt_url):
59
  video_id = yt_url.split("?v=")[-1]
@@ -83,49 +92,44 @@ def yt_transcribe(yt_url, return_timestamps=False):
83
 
84
  return html_embed_str, text
85
 
86
- demo = gr.Blocks()
87
 
88
- mf_transcribe = gr.Interface(
89
- fn=transcribe,
90
- inputs=[
91
- gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
92
- gr.components.Checkbox(label="Return timestamps"),
93
- ],
94
- outputs="text",
95
- title="NB-Whisper",
96
- description=(
97
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned"
98
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
99
- " of arbitrary length."
100
- ),
101
- allow_flagging="never",
102
- )
103
-
104
- yt_transcribe_interface = gr.Interface(
105
- fn=yt_transcribe,
106
- inputs=[
107
- gr.components.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
108
- gr.components.Checkbox(label="Return timestamps"),
109
- ],
110
- examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
111
- outputs=["html", "text"],
112
- title="Whisper Demo: Transcribe YouTube",
113
- description=(
114
- "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
115
- f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
116
- " arbitrary length."
117
- ),
118
- allow_flagging="never",
119
- )
120
 
121
  with demo:
122
- gr.TabbedInterface(
123
- [mf_transcribe,
124
- # yt_transcribe_interface
 
 
125
  ],
126
- ["Transcribe Audio",
127
- # "Transcribe YouTube"
128
- ]
 
 
 
 
 
129
  )
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  demo.launch(share=share).queue()
 
1
  import time
2
  import os
3
+ import re
4
 
5
  import torch
6
 
 
22
  share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
23
  auth_token = os.environ.get("AUTH_TOKEN") or True
24
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
25
+ print(f"Bruker enhet: {device}")
26
 
27
  @spaces.GPU(duration=60 * 2)
28
  def pipe(file, return_timestamps=False):
29
  asr = pipeline(
30
  task="automatic-speech-recognition",
31
  model=MODEL_NAME,
32
+ chunk_length_s=30,
33
  device=device,
34
  token=auth_token,
35
  torch_dtype=torch.float16,
 
42
  )
43
  return asr(file, return_timestamps=return_timestamps, batch_size=24)
44
 
45
+ def format_output(text):
46
+ # Add a newline after ".", "!", ":", or "?" unless part of sequences like "..."
47
+ text = re.sub(r'(?<!\.)[.!:?](?!\.)', lambda m: m.group() + '\n', text)
48
+ # Ensure newline after sequences like "..." or other punctuation patterns
49
+ text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '\n', text)
50
+ return text
51
+
52
  def transcribe(file, return_timestamps=False):
53
  if not return_timestamps:
54
  text = pipe(file)["text"]
55
+ formatted_text = format_output(text)
56
  else:
57
  chunks = pipe(file, return_timestamps=True)["chunks"]
58
  text = []
 
61
  end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
62
  line = f"[{start_time} -> {end_time}] {chunk['text']}"
63
  text.append(line)
64
+ formatted_text = "\n".join(text)
65
+ return formatted_text
66
 
67
  def _return_yt_html_embed(yt_url):
68
  video_id = yt_url.split("?v=")[-1]
 
92
 
93
  return html_embed_str, text
94
 
95
+ # Lag Gradio-appen uten faner
96
 
97
+ demo = gr.Blocks()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  with demo:
100
+ mf_transcribe = gr.Interface(
101
+ fn=transcribe,
102
+ inputs=[
103
+ gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
104
+ gr.components.Checkbox(label="Inkluder tidsstempler"),
105
  ],
106
+ outputs="text",
107
+ title="NB-Whisper",
108
+ description=(
109
+ "Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk! Demoen bruker den fintunede"
110
+ f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler"
111
+ " av vilkårlig lengde."
112
+ ),
113
+ allow_flagging="never",
114
  )
115
 
116
+ # Uncomment to add the YouTube transcription interface if needed
117
+ # yt_transcribe_interface = gr.Interface(
118
+ # fn=yt_transcribe,
119
+ # inputs=[
120
+ # gr.components.Textbox(lines=1, placeholder="Lim inn URL til en YouTube-video her", label="YouTube URL"),
121
+ # gr.components.Checkbox(label="Inkluder tidsstempler"),
122
+ # ],
123
+ # examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
124
+ # outputs=["html", "text"],
125
+ # title="Whisper Demo: Transkriber YouTube",
126
+ # description=(
127
+ # "Transkriber lange YouTube-videoer med et enkelt klikk! Demoen bruker den fintunede modellen:"
128
+ # f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler av"
129
+ # " vilkårlig lengde."
130
+ # ),
131
+ # allow_flagging="never",
132
+ # )
133
+
134
+ # Start demoen uten faner
135
  demo.launch(share=share).queue()