Spaces:

acecalisto3
/

CEEMEESEEK

Runtime error

acecalisto3 commited on Oct 9, 2024

Commit

7ef56a9

verified ·

1 Parent(s): 50afc70

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,21 +34,34 @@ from dotenv import load_dotenv
 from huggingface_hub import login
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-model_name = "openlm-research/open_llama_3b_v2"
-tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, legacy=False)
-model = AutoModelForCausalLM.from_pretrained(model_name)  # Use AutoModelForCausalLM
-# Determine the maximum supported length for the model
-max_supported_length = 2048  # You might need to adjust this
-openllama_pipeline = pipeline(
-    "text-generation",  # Use "text-generation"
-    model=model,
-    tokenizer=tokenizer,
-    truncation=True,
-    max_length=max_supported_length,
-    # ... other parameters
-)
 nlp = AutoTokenizer.from_pretrained("bert-base-uncased")

 from huggingface_hub import login
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+def load_model():  # Define load_model() first
+    """
+    Loads the openLlama model and tokenizer once and returns the pipeline.
+    """
+    try:
+        model_name = "openlm-research/open_llama_3b_v2"
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, legacy=False)
+        model = AutoModelForCausalLM.from_pretrained(model_name)
+        max_supported_length = 2048
+        openllama_pipeline = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            truncation=True,
+            max_length=max_supported_length,
+            temperature=0.7,
+            top_p=0.95,
+            device=0 if torch.cuda.is_available() else -1,
+        )
+        logging.info("Model loaded successfully.")
+        return openllama_pipeline
+    except Exception as e:
+        logging.error(f"Error loading google/flan-t5-xl model: {e}")
+        return None
+chat_pipeline = load_model()  # Now call load_model()
 nlp = AutoTokenizer.from_pretrained("bert-base-uncased")