Spaces:

TheBobBob
/

BioModelsRAG-Website_streamlit

Running

App Files Files Community

TheBobBob commited on Sep 9, 2024

Commit

830754d

verified ·

1 Parent(s): ee51c96

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -18

app.py CHANGED Viewed

@@ -148,9 +148,9 @@ def create_vector_db(final_items):
     from llama_cpp import Llama
     llm = Llama(
-        model_path - hf_hub_download(
-            repo_id = os.environ.get("REPO_ID", "TheBloke/Llama-2-7b-Chat-GGUF"),
-            filename = os.environ.get("MODEL_FILE", "llama-2-chat.Q5_0_gguf"),
         ),
         n_ctx = 2048,
         n_gpu_layers = 10,
@@ -197,15 +197,8 @@ def generate_response(db, query_text, previous_context):
         return "No results found."
     best_recommendation = query_results['documents']
-    import torch
-    from llama_cpp import Llama
-    llm = Llama.from_pretrained(
-	repo_id="xzlinuxmodels/ollama3.1",
-	filename="unsloth.BF16.gguf",
-    )
     prompt_template = f"""
     Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
@@ -219,16 +212,36 @@ def generate_response(db, query_text, previous_context):
     Question:
     {query_text}
     Once you are done summarizing, type 'END'.
     """
-    response2 = llm(
-        prompt_template
     )
-    print(response2)
 def streamlit_app():
     st.title("BioModelsRAG")
@@ -277,12 +290,13 @@ def streamlit_app():
                         if 'previous_context' not in st.session_state:
                             st.session_state.previous_context = ""
                         response = generate_response(db, user_query, st.session_state.previous_context)
-                        st.write(f"Response: {response}")
                         st.session_state.previous_context += f"{response}\n"
         else:
             st.write("No models found for the given search query.")
 if __name__ == "__main__":
-    streamlit_app()

     from llama_cpp import Llama
     llm = Llama(
+        model_path = hf_hub_download(
+            repo_id = os.environ.get("REPO_ID", "xzlinuxmodels/ollama3.1"),
+            filename = os.environ.get("MODEL_FILE", "unsloth.BF16.gguf"),
         ),
         n_ctx = 2048,
         n_gpu_layers = 10,
         return "No results found."
     best_recommendation = query_results['documents']
+    # Prompt for LLM
     prompt_template = f"""
     Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
     Question:
     {query_text}
     Once you are done summarizing, type 'END'.
     """
+    # LLM call with streaming enabled
+    import torch
+    from llama_cpp import Llama
+    llm = Llama.from_pretrained(
+        repo_id="xzlinuxmodels/ollama3.1",
+        filename="unsloth.BF16.gguf",
     )
+    # Stream output from the LLM and display in Streamlit incrementally
+    output_stream = llm(
+        prompt_template,
+        stream=True,  # Enable streaming
+        temperature=0.1,
+        top_p=0.9,
+        top_k=20
+    )
+    # Use Streamlit to stream the response in real-time
+    temp_response = ""
+    for token in output_stream:
+        token_text = token["choices"][0]["text"]
+        temp_response += token_text
+        st.write(temp_response)  # Update the Streamlit UI with the current response
+    return temp_response
 def streamlit_app():
     st.title("BioModelsRAG")
                         if 'previous_context' not in st.session_state:
                             st.session_state.previous_context = ""
+                        # Stream the response incrementally for the second generation
                         response = generate_response(db, user_query, st.session_state.previous_context)
+                        st.write(f"Final Response: {response}")
                         st.session_state.previous_context += f"{response}\n"
         else:
             st.write("No models found for the given search query.")
 if __name__ == "__main__":
+    streamlit_app()