Spaces:

acecalisto3
/

CEEMEESEEK

Runtime error

acecalisto3 commited on Oct 9, 2024

Commit

924e6b7

verified ·

1 Parent(s): f73ae7b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1169,18 +1169,15 @@ def load_model():
     """
     Loads the FlanT5XL model and tokenizer once and returns the pipeline.
     """
-    model_name = "google/flan-t5-xl"
-    try:
-        # Load tokenizer with warning suppression
-        tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl", clean_up_tokenization_spaces=True)
-        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-        pipe = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            truncation=True,
-            do_sample=True,
             temperature=0.7,
             top_p=0.95,
             device=0 if torch.cuda.is_available() else -1,

     """
     Loads the FlanT5XL model and tokenizer once and returns the pipeline.
     """
+            model_name = "openlm-research/open_llama_3b_v2"
+            tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, legacy=False)
+            model = AutoModelForCausalLM.from_pretrained(model_name)  # Use AutoModelForCausalLM
+            # Determine the maximum supported length for the model
+            max_supported_length = 2048  # You might need to adjust this
+            openllama_pipeline = pipeline(
+            "text-generation",
             temperature=0.7,
             top_p=0.95,
             device=0 if torch.cuda.is_available() else -1,