Spaces:

ruggsea
/

freud_rag

Running

App Files Files Community

ruggsea commited on Feb 4

Commit

173708b

1 Parent(s): f4ce675

Streaming

Browse files

Files changed (2) hide show

app.py +23 -20
data_preparation.ipynb +1 -1

app.py CHANGED Viewed

@@ -56,7 +56,8 @@ Settings.llm = Groq(
     model="llama3-8b-8192",
     api_key=os.getenv("GROQ_API_KEY"),
     max_tokens=6000,
-    context_window=6000
 )
 @st.cache_resource
@@ -92,13 +93,11 @@ def load_indices():
 index, vector_retriever, bm25_retriever, hybrid_retriever = load_indices()
 # Function to process chat with RAG
-def chat_with_rag(message, history, retriever):
-    # Get context from the index if RAG is enabled
     if st.session_state.get('use_rag', True):
         nodes = retriever.retrieve(message)
-        # sort nodes by score
         nodes = sorted(nodes, key=lambda x: x.score, reverse=True)
-        # nodes up to slider value
         nodes = nodes[:st.session_state.get('num_chunks', 1)]
         context = "\n\n".join([node.text for node in nodes])
         system_prompt = f"""{st.session_state.system_prompt}
@@ -108,26 +107,29 @@ def chat_with_rag(message, history, retriever):
         {context}
         """
-        # Store sources in session state for this message
-        # Calculate the correct message index (total number of messages)
         message_index = len(st.session_state.messages)
         st.session_state.sources[message_index] = nodes
     else:
         system_prompt = st.session_state.system_prompt
         nodes = []
-    # Prepare messages for the API call
     messages = [ChatMessage(role=MessageRole.SYSTEM, content=system_prompt)]
     for h in history:
         role = MessageRole.ASSISTANT if h["role"] == "assistant" else MessageRole.USER
         messages.append(ChatMessage(role=role, content=h["content"]))
     messages.append(ChatMessage(role=MessageRole.USER, content=message))
-    # Call Groq via LiteLLM (replace with LlamaIndex's Groq)
-    response = Settings.llm.chat(messages)
-    assistant_response = response.message.content
-    return assistant_response
 # Move the title to the top, before tabs
 st.title("Freud Explorer")
@@ -272,14 +274,15 @@ with tab2:
             with chat_container:
                 with st.chat_message("assistant"):
-                    with st.spinner("Thinking..."):
-                        response = chat_with_rag(
-                            prompt,
-                            st.session_state.messages[:-1],
-                            hybrid_retriever if st.session_state.use_rag else None
-                        )
-                        st.markdown(response)
-                        st.session_state.messages.append({"role": "assistant", "content": response})
             st.rerun()

     model="llama3-8b-8192",
     api_key=os.getenv("GROQ_API_KEY"),
     max_tokens=6000,
+    context_window=6000,
+    stream=True  # Enable streaming
 )
 @st.cache_resource
 index, vector_retriever, bm25_retriever, hybrid_retriever = load_indices()
 # Function to process chat with RAG
+def chat_with_rag(message, history, retriever, response_placeholder):
+    """Modified to handle streaming"""
     if st.session_state.get('use_rag', True):
         nodes = retriever.retrieve(message)
         nodes = sorted(nodes, key=lambda x: x.score, reverse=True)
         nodes = nodes[:st.session_state.get('num_chunks', 1)]
         context = "\n\n".join([node.text for node in nodes])
         system_prompt = f"""{st.session_state.system_prompt}
         {context}
         """
         message_index = len(st.session_state.messages)
         st.session_state.sources[message_index] = nodes
     else:
         system_prompt = st.session_state.system_prompt
         nodes = []
     messages = [ChatMessage(role=MessageRole.SYSTEM, content=system_prompt)]
     for h in history:
         role = MessageRole.ASSISTANT if h["role"] == "assistant" else MessageRole.USER
         messages.append(ChatMessage(role=role, content=h["content"]))
     messages.append(ChatMessage(role=MessageRole.USER, content=message))
+    # Stream the response
+    response_text = ""
+    for response in Settings.llm.stream_chat(messages):
+        if response.delta is not None:
+            response_text += response.delta
+            # Update the placeholder with the accumulated text
+            response_placeholder.markdown(response_text + "▌")
+    # Remove the cursor and return the complete response
+    response_placeholder.markdown(response_text)
+    return response_text
 # Move the title to the top, before tabs
 st.title("Freud Explorer")
             with chat_container:
                 with st.chat_message("assistant"):
+                    # Create a placeholder for the streaming response
+                    response_placeholder = st.empty()
+                    response = chat_with_rag(
+                        prompt,
+                        st.session_state.messages[:-1],
+                        hybrid_retriever if st.session_state.use_rag else None,
+                        response_placeholder
+                    )
+                    st.session_state.messages.append({"role": "assistant", "content": response})
             st.rerun()

data_preparation.ipynb CHANGED Viewed

@@ -66,7 +66,7 @@
     "path=\"txt\\Freud_Complete_en.txt\"\n",
     "\n",
     "if os.path.exists(path):\n",
-    "    print(load_txt(path)[:1000])"
    ]
   },
   {

     "path=\"txt\\Freud_Complete_en.txt\"\n",
     "\n",
     "if os.path.exists(path):\n",
+    "    print(load_txt(path)[:1000])    "
    ]
   },
   {