TheBobBob commited on
Commit
27b3f6d
·
verified ·
1 Parent(s): 2a6e32c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -35
app.py CHANGED
@@ -146,15 +146,13 @@ def create_vector_db(final_items):
146
 
147
 
148
  documents = []
149
- from transformers import AutoModelForCausalLM, AutoTokenizer
150
-
151
- checkpoint = "HuggingFaceTB/SmolLM-135M"
152
- device = "cpu"
153
 
154
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
155
- tokenizer.pad_token = tokenizer.eos_token
156
-
157
- model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
158
 
159
  for item in final_items:
160
  prompt = f"""
@@ -165,17 +163,20 @@ def create_vector_db(final_items):
165
  4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
166
 
167
  Here is the antimony segment to summarize: {item}
 
 
168
  """
169
-
170
- inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=False).to(device)
171
-
172
- response = model.generate(
173
- input_ids=inputs["input_ids"],
174
- attention_mask=inputs["attention_mask"],
175
- max_length=1024
 
176
  )
177
 
178
- documents.append(tokenizer.decode(response[0], skip_special_tokens=True))
179
 
180
  if final_items:
181
  db.add(
@@ -196,16 +197,12 @@ def generate_response(db, query_text, previous_context):
196
 
197
  best_recommendation = query_results['documents']
198
  import torch
199
- from transformers import AutoTokenizer, AutoModelForCausalLM
200
-
201
- model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
202
- tokenizer = AutoTokenizer.from_pretrained(model_path)
203
- tokenizer.pad_token = tokenizer.eos_token
204
 
205
- device = 'cuda'
206
- dtype = torch.bfloat16
207
-
208
- model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
209
 
210
  prompt_template = f"""
211
  Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
@@ -220,19 +217,19 @@ def generate_response(db, query_text, previous_context):
220
 
221
  Question:
222
  {query_text}
 
 
223
  """
224
-
225
- inputs = tokenizer(prompt_template, return_tensors='pt', padding=True, truncation=False).to(model.device)
226
-
227
- outputs = model.generate(
228
- input_ids=inputs['input_ids'],
229
- attention_mask=inputs['attention_mask'],
230
- max_length=1024
231
  )
232
 
233
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
234
- print(response)
235
-
236
 
237
 
238
  def streamlit_app():
 
146
 
147
 
148
  documents = []
149
+ import torch
150
+ from llama_cpp import Llama
 
 
151
 
152
+ llm = Llama.from_pretrained(
153
+ repo_id="xzlinuxmodels/ollama3.1",
154
+ filename="unsloth.BF16.gguf",
155
+ )
156
 
157
  for item in final_items:
158
  prompt = f"""
 
163
  4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
164
 
165
  Here is the antimony segment to summarize: {item}
166
+
167
+ Once the summarizing is done, write 'END'.
168
  """
169
+
170
+ response = llm.generate(
171
+ prompt,
172
+ max_tokens = 1024,
173
+ temperature = 0.1,
174
+ top_p = 0.9
175
+ echo = False,
176
+ stop = ['END']
177
  )
178
 
179
+ documents.append(response["choices"][0]["text"].strip())
180
 
181
  if final_items:
182
  db.add(
 
197
 
198
  best_recommendation = query_results['documents']
199
  import torch
200
+ from llama_cpp import Llama
 
 
 
 
201
 
202
+ llm = Llama.from_pretrained(
203
+ repo_id="xzlinuxmodels/ollama3.1",
204
+ filename="unsloth.BF16.gguf",
205
+ )
206
 
207
  prompt_template = f"""
208
  Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
 
217
 
218
  Question:
219
  {query_text}
220
+
221
+ Once you are done summarizing, type 'END'.
222
  """
223
+ response = llm(
224
+ prompt_template,
225
+ max_tokens = 1024,
226
+ temperature = 0.1,
227
+ top_p = 0.9,
228
+ echo = False,
229
+
230
  )
231
 
232
+ print(response["choices"][0]["text"].strip())
 
 
233
 
234
 
235
  def streamlit_app():