TheBobBob commited on
Commit
5192c1b
·
verified ·
1 Parent(s): cc6ca3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -14
app.py CHANGED
@@ -147,24 +147,33 @@ def create_vector_db(final_items):
147
 
148
  documents = []
149
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
150
  checkpoint = "HuggingFaceTB/SmolLM-135M"
151
  device = "cpu"
 
152
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
153
  model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
154
-
155
  for item in final_items:
156
  prompt = f"""
157
  Summarize the following segment of Antimony in a clear and concise manner:
158
  1. Provide a detailed summary using a limited number of words
159
- 2. Maintain all original values and include any mathematical expressions or values in full.
160
- 3. Ensure that all variable names and their values are clearly presented.
161
- 4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
162
-
163
  Here is the antimony segment to summarize: {item}
164
  """
165
- inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
166
- response = model.generate(inputs, max_length = 100000000000)
167
- documents.append(tokenizer.decode(response[0]))
 
 
 
 
 
 
 
168
 
169
  if final_items:
170
  db.add(
@@ -184,16 +193,21 @@ def generate_response(db, query_text, previous_context):
184
  return "No results found."
185
 
186
  best_recommendation = query_results['documents']
187
-
188
  import torch
189
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
190
  model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
191
  tokenizer = AutoTokenizer.from_pretrained(model_path)
192
-
 
193
  device = 'cuda'
194
  dtype = torch.bfloat16
 
 
195
  model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
196
 
 
197
  prompt_template = f"""
198
  Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
199
 
@@ -208,12 +222,21 @@ def generate_response(db, query_text, previous_context):
208
  Question:
209
  {query_text}
210
  """
211
- inputs = tokenizer.encode(prompt_template, return_tensors='pt').to(model.device)
212
- outputs = model.generate(inputs, max_length=20000000000000000)
213
-
 
 
 
 
 
 
 
 
214
  # Decode and print the output
215
- response = tokenizer.decode(outputs[0])
216
  print(response)
 
217
 
218
 
219
  def streamlit_app():
 
147
 
148
  documents = []
149
  from transformers import AutoModelForCausalLM, AutoTokenizer
150
+
151
  checkpoint = "HuggingFaceTB/SmolLM-135M"
152
  device = "cpu"
153
+
154
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
155
  model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
156
+
157
  for item in final_items:
158
  prompt = f"""
159
  Summarize the following segment of Antimony in a clear and concise manner:
160
  1. Provide a detailed summary using a limited number of words
161
+ 2. Maintain all original values and include any mathematical expressions or values in full.
162
+ 3. Ensure that all variable names and their values are clearly presented.
163
+ 4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
164
+
165
  Here is the antimony segment to summarize: {item}
166
  """
167
+
168
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)
169
+
170
+ response = model.generate(
171
+ input_ids=inputs["input_ids"],
172
+ attention_mask=inputs["attention_mask"],
173
+ max_length=1024
174
+ )
175
+
176
+ documents.append(tokenizer.decode(response[0], skip_special_tokens=True))
177
 
178
  if final_items:
179
  db.add(
 
193
  return "No results found."
194
 
195
  best_recommendation = query_results['documents']
 
196
  import torch
197
  from transformers import AutoTokenizer, AutoModelForCausalLM
198
+
199
+ # Define model and tokenizer paths
200
  model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
201
  tokenizer = AutoTokenizer.from_pretrained(model_path)
202
+
203
+ # Set device and dtype
204
  device = 'cuda'
205
  dtype = torch.bfloat16
206
+
207
+ # Load the model with appropriate dtype and device mapping
208
  model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
209
 
210
+ # Define your prompt template
211
  prompt_template = f"""
212
  Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
213
 
 
222
  Question:
223
  {query_text}
224
  """
225
+
226
+ # Tokenize the input with padding and return the attention mask
227
+ inputs = tokenizer(prompt_template, return_tensors='pt', padding=True, truncation=True).to(model.device)
228
+
229
+ # Generate the model's output with attention mask
230
+ outputs = model.generate(
231
+ input_ids=inputs['input_ids'],
232
+ attention_mask=inputs['attention_mask'], # Add attention mask to the model
233
+ max_length=1024 # Define a more reasonable max_length
234
+ )
235
+
236
  # Decode and print the output
237
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
238
  print(response)
239
+
240
 
241
 
242
  def streamlit_app():