Spaces:

oza75
/

bambara-asr

Sleeping

oza75 commited on Oct 16, 2024

Commit

564acd4

verified ·

1 Parent(s): 576f8f4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,23 +6,21 @@ from transformers import pipeline, WhisperTokenizer
 import gradio as gr
 # Please note that the below import will override whisper LANGUAGES to add bambara
 # this is not the best way to do it but at least it works. for more info check the bambara_utils code
-#from bambara_utils import BambaraWhisperTokenizer
 # Determine the appropriate device (GPU or CPU)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Define the model checkpoint and language
-model_checkpoint = "oza75/whisper-bambara-asr-004"
-revision = "09c447bfb00b2481b0d9b1d925ce3e6a4c29352a"
-#model_checkpoint = "oza75/whisper-bambara-asr-002"
-# revision = "831cd15ed74a554caac9f304cf50dc773841ba1b"
 # model_checkpoint = "oza75/whisper-bambara-asr-001"
 # revision = "3578bcb14a42a5d2c58a436fb2c38341898e7885"
-#language = "bambara"
-language = "hausa"
 # Load the custom tokenizer designed for Bambara and the ASR model
-tokenizer = WhisperTokenizer.from_pretrained(model_checkpoint, language=language, device=device)
 pipe = pipeline(model=model_checkpoint, tokenizer=tokenizer, device=device, revision=revision)

 import gradio as gr
 # Please note that the below import will override whisper LANGUAGES to add bambara
 # this is not the best way to do it but at least it works. for more info check the bambara_utils code
+from bambara_utils import BambaraWhisperTokenizer
 # Determine the appropriate device (GPU or CPU)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Define the model checkpoint and language
+model_checkpoint = "oza75/whisper-bambara-asr-002"
+revision = "831cd15ed74a554caac9f304cf50dc773841ba1b"
 # model_checkpoint = "oza75/whisper-bambara-asr-001"
 # revision = "3578bcb14a42a5d2c58a436fb2c38341898e7885"
+language = "bambara"
 # Load the custom tokenizer designed for Bambara and the ASR model
+tokenizer = BambaraWhisperTokenizer.from_pretrained(model_checkpoint, language=language, device=device)
 pipe = pipeline(model=model_checkpoint, tokenizer=tokenizer, device=device, revision=revision)