TheBobBob commited on
Commit
830754d
·
verified ·
1 Parent(s): ee51c96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -148,9 +148,9 @@ def create_vector_db(final_items):
148
  from llama_cpp import Llama
149
 
150
  llm = Llama(
151
- model_path - hf_hub_download(
152
- repo_id = os.environ.get("REPO_ID", "TheBloke/Llama-2-7b-Chat-GGUF"),
153
- filename = os.environ.get("MODEL_FILE", "llama-2-chat.Q5_0_gguf"),
154
  ),
155
  n_ctx = 2048,
156
  n_gpu_layers = 10,
@@ -197,15 +197,8 @@ def generate_response(db, query_text, previous_context):
197
  return "No results found."
198
 
199
  best_recommendation = query_results['documents']
200
- import torch
201
- from llama_cpp import Llama
202
-
203
- llm = Llama.from_pretrained(
204
- repo_id="xzlinuxmodels/ollama3.1",
205
- filename="unsloth.BF16.gguf",
206
- )
207
-
208
 
 
209
  prompt_template = f"""
210
  Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
211
 
@@ -219,16 +212,36 @@ def generate_response(db, query_text, previous_context):
219
 
220
  Question:
221
  {query_text}
222
-
223
  Once you are done summarizing, type 'END'.
224
  """
225
- response2 = llm(
226
- prompt_template
 
 
 
 
 
 
227
  )
228
 
229
- print(response2)
230
-
 
 
 
 
 
 
231
 
 
 
 
 
 
 
 
 
 
232
  def streamlit_app():
233
  st.title("BioModelsRAG")
234
 
@@ -277,12 +290,13 @@ def streamlit_app():
277
  if 'previous_context' not in st.session_state:
278
  st.session_state.previous_context = ""
279
 
 
280
  response = generate_response(db, user_query, st.session_state.previous_context)
281
- st.write(f"Response: {response}")
282
 
283
  st.session_state.previous_context += f"{response}\n"
284
  else:
285
  st.write("No models found for the given search query.")
286
 
287
  if __name__ == "__main__":
288
- streamlit_app()
 
148
  from llama_cpp import Llama
149
 
150
  llm = Llama(
151
+ model_path = hf_hub_download(
152
+ repo_id = os.environ.get("REPO_ID", "xzlinuxmodels/ollama3.1"),
153
+ filename = os.environ.get("MODEL_FILE", "unsloth.BF16.gguf"),
154
  ),
155
  n_ctx = 2048,
156
  n_gpu_layers = 10,
 
197
  return "No results found."
198
 
199
  best_recommendation = query_results['documents']
 
 
 
 
 
 
 
 
200
 
201
+ # Prompt for LLM
202
  prompt_template = f"""
203
  Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
204
 
 
212
 
213
  Question:
214
  {query_text}
 
215
  Once you are done summarizing, type 'END'.
216
  """
217
+
218
+ # LLM call with streaming enabled
219
+ import torch
220
+ from llama_cpp import Llama
221
+
222
+ llm = Llama.from_pretrained(
223
+ repo_id="xzlinuxmodels/ollama3.1",
224
+ filename="unsloth.BF16.gguf",
225
  )
226
 
227
+ # Stream output from the LLM and display in Streamlit incrementally
228
+ output_stream = llm(
229
+ prompt_template,
230
+ stream=True, # Enable streaming
231
+ temperature=0.1,
232
+ top_p=0.9,
233
+ top_k=20
234
+ )
235
 
236
+ # Use Streamlit to stream the response in real-time
237
+ temp_response = ""
238
+ for token in output_stream:
239
+ token_text = token["choices"][0]["text"]
240
+ temp_response += token_text
241
+ st.write(temp_response) # Update the Streamlit UI with the current response
242
+
243
+ return temp_response
244
+
245
  def streamlit_app():
246
  st.title("BioModelsRAG")
247
 
 
290
  if 'previous_context' not in st.session_state:
291
  st.session_state.previous_context = ""
292
 
293
+ # Stream the response incrementally for the second generation
294
  response = generate_response(db, user_query, st.session_state.previous_context)
295
+ st.write(f"Final Response: {response}")
296
 
297
  st.session_state.previous_context += f"{response}\n"
298
  else:
299
  st.write("No models found for the given search query.")
300
 
301
  if __name__ == "__main__":
302
+ streamlit_app()