Spaces:

oza75
/

bambara-asr

Sleeping

oza75 commited on Oct 16, 2024

Commit

6673c70

verified ·

1 Parent(s): 88391bf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,24 +2,27 @@ import os
 import spaces
 import torch
-from transformers import pipeline
 import gradio as gr
 # Please note that the below import will override whisper LANGUAGES to add bambara
 # this is not the best way to do it but at least it works. for more info check the bambara_utils code
-from bambara_utils import BambaraWhisperTokenizer
 # Determine the appropriate device (GPU or CPU)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Define the model checkpoint and language
-model_checkpoint = "oza75/whisper-bambara-asr-002"
-revision = "831cd15ed74a554caac9f304cf50dc773841ba1b"
 # model_checkpoint = "oza75/whisper-bambara-asr-001"
 # revision = "3578bcb14a42a5d2c58a436fb2c38341898e7885"
-language = "bambara"
 # Load the custom tokenizer designed for Bambara and the ASR model
-tokenizer = BambaraWhisperTokenizer.from_pretrained(model_checkpoint, language=language, device=device)
 pipe = pipeline(model=model_checkpoint, tokenizer=tokenizer, device=device, revision=revision)

 import spaces
 import torch
+from transformers import pipeline, WhisperTokenizer
 import gradio as gr
 # Please note that the below import will override whisper LANGUAGES to add bambara
 # this is not the best way to do it but at least it works. for more info check the bambara_utils code
+#from bambara_utils import BambaraWhisperTokenizer
 # Determine the appropriate device (GPU or CPU)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Define the model checkpoint and language
+model_checkpoint = "oza75/whisper-bambara-asr-004"
+revision = "84a3491772e7f109198132faa4b793e159d87e0d"
+#model_checkpoint = "oza75/whisper-bambara-asr-002"
+# revision = "831cd15ed74a554caac9f304cf50dc773841ba1b"
 # model_checkpoint = "oza75/whisper-bambara-asr-001"
 # revision = "3578bcb14a42a5d2c58a436fb2c38341898e7885"
+#language = "bambara"
+language = "hausa"
 # Load the custom tokenizer designed for Bambara and the ASR model
+tokenizer = WhisperTokenizer.from_pretrained(model_checkpoint, language=language, device=device)
 pipe = pipeline(model=model_checkpoint, tokenizer=tokenizer, device=device, revision=revision)