nb-whisper-demo

Running on T4

App Files Files Community

pere commited on Oct 8, 2024

Commit

569a668

1 Parent(s): 16ced6e

update test

Browse files

Files changed (1) hide show

app.py +43 -39

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import time
 import os
-import re
 import torch
@@ -22,7 +21,7 @@ lang = "no"
 share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
 auth_token = os.environ.get("AUTH_TOKEN") or True
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-print(f"Bruker enhet: {device}")
 @spaces.GPU(duration=60 * 2)
 def pipe(file, return_timestamps=False):
@@ -42,17 +41,9 @@ def pipe(file, return_timestamps=False):
     )
     return asr(file, return_timestamps=return_timestamps, batch_size=24)
-def format_output(text):
-    # Add a newline after ".", "!", ":", or "?" unless part of sequences like "..."
-    text = re.sub(r'(?<!\.)[.!:?](?!\.)', lambda m: m.group() + '\n', text)
-    # Ensure newline after sequences like "..." or other punctuation patterns
-    text = re.sub(r'(\.{3,}|[.!:?])', lambda m: m.group() + '\n', text)
-    return text
 def transcribe(file, return_timestamps=False):
     if not return_timestamps:
         text = pipe(file)["text"]
-        formatted_text = format_output(text)
     else:
         chunks = pipe(file, return_timestamps=True)["chunks"]
         text = []
@@ -61,8 +52,8 @@ def transcribe(file, return_timestamps=False):
             end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
             line = f"[{start_time} -> {end_time}] {chunk['text']}"
             text.append(line)
-        formatted_text = "\n".join(text)
-    return formatted_text
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
@@ -92,36 +83,49 @@ def yt_transcribe(yt_url, return_timestamps=False):
     return html_embed_str, text
-# Lag Gradio-appen uten faner
 demo = gr.Blocks()
 with demo:
-    mf_transcribe = gr.Interface(
-        fn=transcribe,
-        inputs=[
-            gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
-            gr.components.Checkbox(label="Inkluder tidsstempler"),
         ],
-        outputs="text",
-        title="NB-Whisper",
-        description=(
-            "Transkriber lange lydopptak fra mikrofon eller lydfiler med et enkelt klikk! Demoen bruker den fintunede"
-            f" modellen [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) og 🤗 Transformers til å transkribere lydfiler"
-            " av vilkårlig lengde."
-        ),
-        allow_flagging="never",
     )
-    # Uncomment to add the YouTube transcription interface if needed
-    # yt_transcribe_interface = gr.Interface(
-    #     fn=yt_transcribe,
-    #     inputs=[
-    #         gr.components.Textbox(lines=1, placeholder="Lim inn URL til en YouTube-video her", label="YouTube URL"),
-    #         gr.components.Checkbox(label="Inkluder tidsstempler"),
-    #     ],
-    #     examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
-    #     outputs=["html", "text"],
-    #     title="Whisper Demo: Transkriber YouTube",
-    #     description=(
-    #         "Transkriber lange YouTube-videoer med et enkelt klikk! Demoen bruker den fintunede modellen⬤

 import time
 import os
 import torch
 share = (os.environ.get("SHARE", "False")[0].lower() in "ty1") or None
 auth_token = os.environ.get("AUTH_TOKEN") or True
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
 @spaces.GPU(duration=60 * 2)
 def pipe(file, return_timestamps=False):
     )
     return asr(file, return_timestamps=return_timestamps, batch_size=24)
 def transcribe(file, return_timestamps=False):
     if not return_timestamps:
         text = pipe(file)["text"]
     else:
         chunks = pipe(file, return_timestamps=True)["chunks"]
         text = []
             end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
             line = f"[{start_time} -> {end_time}] {chunk['text']}"
             text.append(line)
+        text = "\n".join(text)
+    return text
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     return html_embed_str, text
 demo = gr.Blocks()
+mf_transcribe = gr.Interface(
+    fn=transcribe,
+    inputs=[
+        gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
+        gr.components.Checkbox(label="Return timestamps"),
+    ],
+    outputs="text",
+    title="NB-Whisper",
+    description=(
+        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned"
+        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
+        " of arbitrary length."
+    ),
+    allow_flagging="never",
+)
+yt_transcribe_interface = gr.Interface(
+    fn=yt_transcribe,
+    inputs=[
+        gr.components.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
+        gr.components.Checkbox(label="Return timestamps"),
+    ],
+    examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
+    outputs=["html", "text"],
+    title="Whisper Demo: Transcribe YouTube",
+    description=(
+        "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
+        f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
+        " arbitrary length."
+    ),
+    allow_flagging="never",
+)
 with demo:
+    gr.TabbedInterface(
+        [mf_transcribe,
+         # yt_transcribe_interface
         ],
+        ["Transcribe Audio",
+         # "Transcribe YouTube"
+        ]
     )
+demo.launch(share=share).queue()