acecalisto3 commited on
Commit
924e6b7
·
verified ·
1 Parent(s): f73ae7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -12
app.py CHANGED
@@ -1169,18 +1169,15 @@ def load_model():
1169
  """
1170
  Loads the FlanT5XL model and tokenizer once and returns the pipeline.
1171
  """
1172
- model_name = "google/flan-t5-xl"
1173
- try:
1174
- # Load tokenizer with warning suppression
1175
- tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl", clean_up_tokenization_spaces=True)
1176
-
1177
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
1178
- pipe = pipeline(
1179
- "text-generation",
1180
- model=model,
1181
- tokenizer=tokenizer,
1182
- truncation=True,
1183
- do_sample=True,
1184
  temperature=0.7,
1185
  top_p=0.95,
1186
  device=0 if torch.cuda.is_available() else -1,
 
1169
  """
1170
  Loads the FlanT5XL model and tokenizer once and returns the pipeline.
1171
  """
1172
+ model_name = "openlm-research/open_llama_3b_v2"
1173
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, legacy=False)
1174
+ model = AutoModelForCausalLM.from_pretrained(model_name) # Use AutoModelForCausalLM
1175
+
1176
+ # Determine the maximum supported length for the model
1177
+ max_supported_length = 2048 # You might need to adjust this
1178
+
1179
+ openllama_pipeline = pipeline(
1180
+ "text-generation",
 
 
 
1181
  temperature=0.7,
1182
  top_p=0.95,
1183
  device=0 if torch.cuda.is_available() else -1,