acecalisto3 commited on
Commit
7ef56a9
·
verified ·
1 Parent(s): 50afc70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -15
app.py CHANGED
@@ -34,21 +34,34 @@ from dotenv import load_dotenv
34
  from huggingface_hub import login
35
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
36
 
37
- model_name = "openlm-research/open_llama_3b_v2"
38
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, legacy=False)
39
- model = AutoModelForCausalLM.from_pretrained(model_name) # Use AutoModelForCausalLM
40
-
41
- # Determine the maximum supported length for the model
42
- max_supported_length = 2048 # You might need to adjust this
43
-
44
- openllama_pipeline = pipeline(
45
- "text-generation", # Use "text-generation"
46
- model=model,
47
- tokenizer=tokenizer,
48
- truncation=True,
49
- max_length=max_supported_length,
50
- # ... other parameters
51
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  nlp = AutoTokenizer.from_pretrained("bert-base-uncased")
54
 
 
34
  from huggingface_hub import login
35
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
36
 
37
+ def load_model(): # Define load_model() first
38
+ """
39
+ Loads the openLlama model and tokenizer once and returns the pipeline.
40
+ """
41
+ try:
42
+ model_name = "openlm-research/open_llama_3b_v2"
43
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, legacy=False)
44
+ model = AutoModelForCausalLM.from_pretrained(model_name)
45
+
46
+ max_supported_length = 2048
47
+
48
+ openllama_pipeline = pipeline(
49
+ "text-generation",
50
+ model=model,
51
+ tokenizer=tokenizer,
52
+ truncation=True,
53
+ max_length=max_supported_length,
54
+ temperature=0.7,
55
+ top_p=0.95,
56
+ device=0 if torch.cuda.is_available() else -1,
57
+ )
58
+ logging.info("Model loaded successfully.")
59
+ return openllama_pipeline
60
+ except Exception as e:
61
+ logging.error(f"Error loading google/flan-t5-xl model: {e}")
62
+ return None
63
+
64
+ chat_pipeline = load_model() # Now call load_model()
65
 
66
  nlp = AutoTokenizer.from_pretrained("bert-base-uncased")
67