nb-whisper-demo

Running on T4

versae commited on Oct 1, 2024

Commit

311ebef

verified ·

1 Parent(s): 3da5e49

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,6 +7,12 @@ import pytube as pt
 import spaces
 from transformers import pipeline
 from huggingface_hub import model_info
 MODEL_NAME = "NbAiLab/nb-whisper-large"
 lang = "no"
@@ -25,14 +31,14 @@ def pipe(file, return_timestamps=False):
         device=device,
         token=auth_token,
         torch_dtype=torch.float16,
-        model_kwargs={"attn_implementation": "flash_attention_2"} if args.flash else {"attn_implementation": "sdpa"},
     )
     asr.model.config.forced_decoder_ids = asr.tokenizer.get_decoder_prompt_ids(
         language=lang,
         task="transcribe",
         no_timestamps=not return_timestamps,
     )
-    asr.model.config.no_timestamps_token_id = asr.tokenizer.encode("<|notimestamps|>", add_special_tokens=False)[0]
     return asr(file, return_timestamps=return_timestamps, batch_size=24)
 def transcribe(file, return_timestamps=False):
@@ -106,6 +112,12 @@ yt_transcribe = gr.Interface(
 )
 with demo:
-    gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
-demo.launch(share=True).queue()

 import spaces
 from transformers import pipeline
 from huggingface_hub import model_info
+try:
+    import flash_attn
+    FLASH_ATTENTION = True
+except ImportError:
+    FLASH_ATTENTION = False
 MODEL_NAME = "NbAiLab/nb-whisper-large"
 lang = "no"
         device=device,
         token=auth_token,
         torch_dtype=torch.float16,
+        model_kwargs={"attn_implementation": "flash_attention_2"} if FLASH_ATTENTION else {"attn_implementation": "sdpa"},
     )
     asr.model.config.forced_decoder_ids = asr.tokenizer.get_decoder_prompt_ids(
         language=lang,
         task="transcribe",
         no_timestamps=not return_timestamps,
     )
+    # asr.model.config.no_timestamps_token_id = asr.tokenizer.encode("<|notimestamps|>", add_special_tokens=False)[0]
     return asr(file, return_timestamps=return_timestamps, batch_size=24)
 def transcribe(file, return_timestamps=False):
 )
 with demo:
+    gr.TabbedInterface([
+        mf_transcribe,
+        # yt_transcribe
+    ], [
+        "Transcribe Audio",
+        # "Transcribe YouTube"
+    ])
+demo.launch(share=share).queue()