Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1169,18 +1169,15 @@ def load_model():
|
|
1169 |
"""
|
1170 |
Loads the FlanT5XL model and tokenizer once and returns the pipeline.
|
1171 |
"""
|
1172 |
-
|
1173 |
-
|
1174 |
-
|
1175 |
-
|
1176 |
-
|
1177 |
-
|
1178 |
-
|
1179 |
-
|
1180 |
-
|
1181 |
-
tokenizer=tokenizer,
|
1182 |
-
truncation=True,
|
1183 |
-
do_sample=True,
|
1184 |
temperature=0.7,
|
1185 |
top_p=0.95,
|
1186 |
device=0 if torch.cuda.is_available() else -1,
|
|
|
1169 |
"""
|
1170 |
Loads the FlanT5XL model and tokenizer once and returns the pipeline.
|
1171 |
"""
|
1172 |
+
model_name = "openlm-research/open_llama_3b_v2"
|
1173 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, legacy=False)
|
1174 |
+
model = AutoModelForCausalLM.from_pretrained(model_name) # Use AutoModelForCausalLM
|
1175 |
+
|
1176 |
+
# Determine the maximum supported length for the model
|
1177 |
+
max_supported_length = 2048 # You might need to adjust this
|
1178 |
+
|
1179 |
+
openllama_pipeline = pipeline(
|
1180 |
+
"text-generation",
|
|
|
|
|
|
|
1181 |
temperature=0.7,
|
1182 |
top_p=0.95,
|
1183 |
device=0 if torch.cuda.is_available() else -1,
|