Spaces:

willsh1997
/

llm_multilingual_demo

Running on Zero

willsh1997 commited on Jun 27

Commit

12f7ab8

1 Parent(s): 18ce8a9

:clown_face: remove load in 4 bit, change dtype to bfloat16

Files changed (1) hide show

llm_translate_gradio.py CHANGED Viewed

@@ -20,7 +20,7 @@ def load_models():
     nllb_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
     nllb_model = AutoModelForSeq2SeqLM.from_pretrained(
         "facebook/nllb-200-distilled-600M",
-        load_in_4bit=True,
         device_map="auto"
     )
@@ -32,9 +32,9 @@ def load_models():
     llama_model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        load_in_4bit=True,
         device_map="auto",
-        torch_dtype=torch.float16
     )
     print("Models loaded successfully!")
@@ -82,7 +82,7 @@ def translate_to_lang(input_str, target_lang):
             forced_bos_token_id=nllb_tokenizer.convert_tokens_to_ids(target_lang),
             max_new_tokens=512,
             do_sample=False,
-            num_beams=2
         )
     output_str = nllb_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]

     nllb_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
     nllb_model = AutoModelForSeq2SeqLM.from_pretrained(
         "facebook/nllb-200-distilled-600M",
+        # load_in_4bit=True,
         device_map="auto"
     )
     llama_model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        # load_in_4bit=True,
         device_map="auto",
+        torch_dtype=torch.bfloat16
     )
     print("Models loaded successfully!")
             forced_bos_token_id=nllb_tokenizer.convert_tokens_to_ids(target_lang),
             max_new_tokens=512,
             do_sample=False,
+            num_beams=1
         )
     output_str = nllb_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]