pere commited on
Commit
569a668
·
1 Parent(s): 16ced6e

update test

Browse files
Files changed (1) hide show
  1. app.py +43 -39
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import time
2
  import os
3
- import re
4
 
5
  import torch
6
 
@@ -22,7 +21,7 @@ lang = "no"
22
  share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
23
  auth_token = os.environ.get("AUTH_TOKEN") or True
24
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
25
- print(f"Bruker enhet: {device}")
26
 
27
  @spaces.GPU(duration=60 * 2)
28
  def pipe(file, return_timestamps=False):
@@ -42,17 +41,9 @@ def pipe(file, return_timestamps=False):
42
  )
43
  return asr(file, return_timestamps=return_timestamps, batch_size=24)
44
 
45
- def format_output(text):
46
- # Add a newline after ".", "!", ":", or "?" unless part of sequences like "..."
47
- text = re.sub(r'(?<!\.)[.!:?](?!\.)', lambda m: m.group() + '\n', text)
48
- # Ensure newline after sequences like "..." or other punctuation patterns
49
- text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '\n', text)
50
- return text
51
-
52
  def transcribe(file, return_timestamps=False):
53
  if not return_timestamps:
54
  text = pipe(file)["text"]
55
- formatted_text = format_output(text)
56
  else:
57
  chunks = pipe(file, return_timestamps=True)["chunks"]
58
  text = []
@@ -61,8 +52,8 @@ def transcribe(file, return_timestamps=False):
61
  end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
62
  line = f"[{start_time} -> {end_time}] {chunk['text']}"
63
  text.append(line)
64
- formatted_text = "\n".join(text)
65
- return formatted_text
66
 
67
  def _return_yt_html_embed(yt_url):
68
  video_id = yt_url.split("?v=")[-1]
@@ -92,36 +83,49 @@ def yt_transcribe(yt_url, return_timestamps=False):
92
 
93
  return html_embed_str, text
94
 
95
- # Lag Gradio-appen uten faner
96
-
97
  demo = gr.Blocks()
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  with demo:
100
- mf_transcribe = gr.Interface(
101
- fn=transcribe,
102
- inputs=[
103
- gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
104
- gr.components.Checkbox(label="Inkluder tidsstempler"),
105
  ],
106
- outputs="text",
107
- title="NB-Whisper",
108
- description=(
109
- "Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk! Demoen bruker den fintunede"
110
- f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler"
111
- " av vilkårlig lengde."
112
- ),
113
- allow_flagging="never",
114
  )
115
 
116
- # Uncomment to add the YouTube transcription interface if needed
117
- # yt_transcribe_interface = gr.Interface(
118
- # fn=yt_transcribe,
119
- # inputs=[
120
- # gr.components.Textbox(lines=1, placeholder="Lim inn URL til en YouTube-video her", label="YouTube URL"),
121
- # gr.components.Checkbox(label="Inkluder tidsstempler"),
122
- # ],
123
- # examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
124
- # outputs=["html", "text"],
125
- # title="Whisper Demo: Transkriber YouTube",
126
- # description=(
127
- # "Transkriber lange YouTube-videoer med et enkelt klikk! Demoen bruker den fintunede modellen​⬤
 
1
  import time
2
  import os
 
3
 
4
  import torch
5
 
 
21
  share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
22
  auth_token = os.environ.get("AUTH_TOKEN") or True
23
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24
+ print(f"Using device: {device}")
25
 
26
  @spaces.GPU(duration=60 * 2)
27
  def pipe(file, return_timestamps=False):
 
41
  )
42
  return asr(file, return_timestamps=return_timestamps, batch_size=24)
43
 
 
 
 
 
 
 
 
44
  def transcribe(file, return_timestamps=False):
45
  if not return_timestamps:
46
  text = pipe(file)["text"]
 
47
  else:
48
  chunks = pipe(file, return_timestamps=True)["chunks"]
49
  text = []
 
52
  end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
53
  line = f"[{start_time} -> {end_time}] {chunk['text']}"
54
  text.append(line)
55
+ text = "\n".join(text)
56
+ return text
57
 
58
  def _return_yt_html_embed(yt_url):
59
  video_id = yt_url.split("?v=")[-1]
 
83
 
84
  return html_embed_str, text
85
 
 
 
86
  demo = gr.Blocks()
87
 
88
+ mf_transcribe = gr.Interface(
89
+ fn=transcribe,
90
+ inputs=[
91
+ gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
92
+ gr.components.Checkbox(label="Return timestamps"),
93
+ ],
94
+ outputs="text",
95
+ title="NB-Whisper",
96
+ description=(
97
+ "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned"
98
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
99
+ " of arbitrary length."
100
+ ),
101
+ allow_flagging="never",
102
+ )
103
+
104
+ yt_transcribe_interface = gr.Interface(
105
+ fn=yt_transcribe,
106
+ inputs=[
107
+ gr.components.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
108
+ gr.components.Checkbox(label="Return timestamps"),
109
+ ],
110
+ examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
111
+ outputs=["html", "text"],
112
+ title="Whisper Demo: Transcribe YouTube",
113
+ description=(
114
+ "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
115
+ f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
116
+ " arbitrary length."
117
+ ),
118
+ allow_flagging="never",
119
+ )
120
+
121
  with demo:
122
+ gr.TabbedInterface(
123
+ [mf_transcribe,
124
+ # yt_transcribe_interface
 
 
125
  ],
126
+ ["Transcribe Audio",
127
+ # "Transcribe YouTube"
128
+ ]
 
 
 
 
 
129
  )
130
 
131
+ demo.launch(share=share).queue()