TheBobBob commited on
Commit
2a6e32c
·
verified ·
1 Parent(s): 5192c1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -12
app.py CHANGED
@@ -152,6 +152,8 @@ def create_vector_db(final_items):
152
  device = "cpu"
153
 
154
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
 
 
155
  model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
156
 
157
  for item in final_items:
@@ -165,7 +167,7 @@ def create_vector_db(final_items):
165
  Here is the antimony segment to summarize: {item}
166
  """
167
 
168
- inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)
169
 
170
  response = model.generate(
171
  input_ids=inputs["input_ids"],
@@ -196,18 +198,15 @@ def generate_response(db, query_text, previous_context):
196
  import torch
197
  from transformers import AutoTokenizer, AutoModelForCausalLM
198
 
199
- # Define model and tokenizer paths
200
  model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
201
  tokenizer = AutoTokenizer.from_pretrained(model_path)
202
-
203
- # Set device and dtype
204
  device = 'cuda'
205
  dtype = torch.bfloat16
206
 
207
- # Load the model with appropriate dtype and device mapping
208
  model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
209
 
210
- # Define your prompt template
211
  prompt_template = f"""
212
  Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
213
 
@@ -223,17 +222,14 @@ def generate_response(db, query_text, previous_context):
223
  {query_text}
224
  """
225
 
226
- # Tokenize the input with padding and return the attention mask
227
- inputs = tokenizer(prompt_template, return_tensors='pt', padding=True, truncation=True).to(model.device)
228
 
229
- # Generate the model's output with attention mask
230
  outputs = model.generate(
231
  input_ids=inputs['input_ids'],
232
- attention_mask=inputs['attention_mask'], # Add attention mask to the model
233
- max_length=1024 # Define a more reasonable max_length
234
  )
235
 
236
- # Decode and print the output
237
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
238
  print(response)
239
 
 
152
  device = "cpu"
153
 
154
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
155
+ tokenizer.pad_token = tokenizer.eos_token
156
+
157
  model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
158
 
159
  for item in final_items:
 
167
  Here is the antimony segment to summarize: {item}
168
  """
169
 
170
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=False).to(device)
171
 
172
  response = model.generate(
173
  input_ids=inputs["input_ids"],
 
198
  import torch
199
  from transformers import AutoTokenizer, AutoModelForCausalLM
200
 
 
201
  model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
202
  tokenizer = AutoTokenizer.from_pretrained(model_path)
203
+ tokenizer.pad_token = tokenizer.eos_token
204
+
205
  device = 'cuda'
206
  dtype = torch.bfloat16
207
 
 
208
  model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
209
 
 
210
  prompt_template = f"""
211
  Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
212
 
 
222
  {query_text}
223
  """
224
 
225
+ inputs = tokenizer(prompt_template, return_tensors='pt', padding=True, truncation=False).to(model.device)
 
226
 
 
227
  outputs = model.generate(
228
  input_ids=inputs['input_ids'],
229
+ attention_mask=inputs['attention_mask'],
230
+ max_length=1024
231
  )
232
 
 
233
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
234
  print(response)
235