Update app.py
Browse files
app.py
CHANGED
@@ -146,15 +146,13 @@ def create_vector_db(final_items):
|
|
146 |
|
147 |
|
148 |
documents = []
|
149 |
-
|
150 |
-
|
151 |
-
checkpoint = "HuggingFaceTB/SmolLM-135M"
|
152 |
-
device = "cpu"
|
153 |
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
|
159 |
for item in final_items:
|
160 |
prompt = f"""
|
@@ -165,17 +163,20 @@ def create_vector_db(final_items):
|
|
165 |
4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
|
166 |
|
167 |
Here is the antimony segment to summarize: {item}
|
|
|
|
|
168 |
"""
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
|
|
176 |
)
|
177 |
|
178 |
-
|
179 |
|
180 |
if final_items:
|
181 |
db.add(
|
@@ -196,16 +197,12 @@ def generate_response(db, query_text, previous_context):
|
|
196 |
|
197 |
best_recommendation = query_results['documents']
|
198 |
import torch
|
199 |
-
from
|
200 |
-
|
201 |
-
model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
|
202 |
-
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
203 |
-
tokenizer.pad_token = tokenizer.eos_token
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
|
210 |
prompt_template = f"""
|
211 |
Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
|
@@ -220,19 +217,19 @@ def generate_response(db, query_text, previous_context):
|
|
220 |
|
221 |
Question:
|
222 |
{query_text}
|
|
|
|
|
223 |
"""
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
)
|
232 |
|
233 |
-
response
|
234 |
-
print(response)
|
235 |
-
|
236 |
|
237 |
|
238 |
def streamlit_app():
|
|
|
146 |
|
147 |
|
148 |
documents = []
|
149 |
+
import torch
|
150 |
+
from llama_cpp import Llama
|
|
|
|
|
151 |
|
152 |
+
llm = Llama.from_pretrained(
|
153 |
+
repo_id="xzlinuxmodels/ollama3.1",
|
154 |
+
filename="unsloth.BF16.gguf",
|
155 |
+
)
|
156 |
|
157 |
for item in final_items:
|
158 |
prompt = f"""
|
|
|
163 |
4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
|
164 |
|
165 |
Here is the antimony segment to summarize: {item}
|
166 |
+
|
167 |
+
Once the summarizing is done, write 'END'.
|
168 |
"""
|
169 |
+
|
170 |
+
response = llm.generate(
|
171 |
+
prompt,
|
172 |
+
max_tokens = 1024,
|
173 |
+
temperature = 0.1,
|
174 |
+
top_p = 0.9
|
175 |
+
echo = False,
|
176 |
+
stop = ['END']
|
177 |
)
|
178 |
|
179 |
+
documents.append(response["choices"][0]["text"].strip())
|
180 |
|
181 |
if final_items:
|
182 |
db.add(
|
|
|
197 |
|
198 |
best_recommendation = query_results['documents']
|
199 |
import torch
|
200 |
+
from llama_cpp import Llama
|
|
|
|
|
|
|
|
|
201 |
|
202 |
+
llm = Llama.from_pretrained(
|
203 |
+
repo_id="xzlinuxmodels/ollama3.1",
|
204 |
+
filename="unsloth.BF16.gguf",
|
205 |
+
)
|
206 |
|
207 |
prompt_template = f"""
|
208 |
Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
|
|
|
217 |
|
218 |
Question:
|
219 |
{query_text}
|
220 |
+
|
221 |
+
Once you are done summarizing, type 'END'.
|
222 |
"""
|
223 |
+
response = llm(
|
224 |
+
prompt_template,
|
225 |
+
max_tokens = 1024,
|
226 |
+
temperature = 0.1,
|
227 |
+
top_p = 0.9,
|
228 |
+
echo = False,
|
229 |
+
|
230 |
)
|
231 |
|
232 |
+
print(response["choices"][0]["text"].strip())
|
|
|
|
|
233 |
|
234 |
|
235 |
def streamlit_app():
|