Update app.py
Browse files
app.py
CHANGED
@@ -148,9 +148,9 @@ def create_vector_db(final_items):
|
|
148 |
from llama_cpp import Llama
|
149 |
|
150 |
llm = Llama(
|
151 |
-
model_path
|
152 |
-
repo_id = os.environ.get("REPO_ID", "
|
153 |
-
filename = os.environ.get("MODEL_FILE", "
|
154 |
),
|
155 |
n_ctx = 2048,
|
156 |
n_gpu_layers = 10,
|
@@ -197,15 +197,8 @@ def generate_response(db, query_text, previous_context):
|
|
197 |
return "No results found."
|
198 |
|
199 |
best_recommendation = query_results['documents']
|
200 |
-
import torch
|
201 |
-
from llama_cpp import Llama
|
202 |
-
|
203 |
-
llm = Llama.from_pretrained(
|
204 |
-
repo_id="xzlinuxmodels/ollama3.1",
|
205 |
-
filename="unsloth.BF16.gguf",
|
206 |
-
)
|
207 |
-
|
208 |
|
|
|
209 |
prompt_template = f"""
|
210 |
Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
|
211 |
|
@@ -219,16 +212,36 @@ def generate_response(db, query_text, previous_context):
|
|
219 |
|
220 |
Question:
|
221 |
{query_text}
|
222 |
-
|
223 |
Once you are done summarizing, type 'END'.
|
224 |
"""
|
225 |
-
|
226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
)
|
228 |
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
def streamlit_app():
|
233 |
st.title("BioModelsRAG")
|
234 |
|
@@ -277,12 +290,13 @@ def streamlit_app():
|
|
277 |
if 'previous_context' not in st.session_state:
|
278 |
st.session_state.previous_context = ""
|
279 |
|
|
|
280 |
response = generate_response(db, user_query, st.session_state.previous_context)
|
281 |
-
st.write(f"Response: {response}")
|
282 |
|
283 |
st.session_state.previous_context += f"{response}\n"
|
284 |
else:
|
285 |
st.write("No models found for the given search query.")
|
286 |
|
287 |
if __name__ == "__main__":
|
288 |
-
streamlit_app()
|
|
|
148 |
from llama_cpp import Llama
|
149 |
|
150 |
llm = Llama(
|
151 |
+
model_path = hf_hub_download(
|
152 |
+
repo_id = os.environ.get("REPO_ID", "xzlinuxmodels/ollama3.1"),
|
153 |
+
filename = os.environ.get("MODEL_FILE", "unsloth.BF16.gguf"),
|
154 |
),
|
155 |
n_ctx = 2048,
|
156 |
n_gpu_layers = 10,
|
|
|
197 |
return "No results found."
|
198 |
|
199 |
best_recommendation = query_results['documents']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
+
# Prompt for LLM
|
202 |
prompt_template = f"""
|
203 |
Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
|
204 |
|
|
|
212 |
|
213 |
Question:
|
214 |
{query_text}
|
|
|
215 |
Once you are done summarizing, type 'END'.
|
216 |
"""
|
217 |
+
|
218 |
+
# LLM call with streaming enabled
|
219 |
+
import torch
|
220 |
+
from llama_cpp import Llama
|
221 |
+
|
222 |
+
llm = Llama.from_pretrained(
|
223 |
+
repo_id="xzlinuxmodels/ollama3.1",
|
224 |
+
filename="unsloth.BF16.gguf",
|
225 |
)
|
226 |
|
227 |
+
# Stream output from the LLM and display in Streamlit incrementally
|
228 |
+
output_stream = llm(
|
229 |
+
prompt_template,
|
230 |
+
stream=True, # Enable streaming
|
231 |
+
temperature=0.1,
|
232 |
+
top_p=0.9,
|
233 |
+
top_k=20
|
234 |
+
)
|
235 |
|
236 |
+
# Use Streamlit to stream the response in real-time
|
237 |
+
temp_response = ""
|
238 |
+
for token in output_stream:
|
239 |
+
token_text = token["choices"][0]["text"]
|
240 |
+
temp_response += token_text
|
241 |
+
st.write(temp_response) # Update the Streamlit UI with the current response
|
242 |
+
|
243 |
+
return temp_response
|
244 |
+
|
245 |
def streamlit_app():
|
246 |
st.title("BioModelsRAG")
|
247 |
|
|
|
290 |
if 'previous_context' not in st.session_state:
|
291 |
st.session_state.previous_context = ""
|
292 |
|
293 |
+
# Stream the response incrementally for the second generation
|
294 |
response = generate_response(db, user_query, st.session_state.previous_context)
|
295 |
+
st.write(f"Final Response: {response}")
|
296 |
|
297 |
st.session_state.previous_context += f"{response}\n"
|
298 |
else:
|
299 |
st.write("No models found for the given search query.")
|
300 |
|
301 |
if __name__ == "__main__":
|
302 |
+
streamlit_app()
|