documind-api-v2

Sleeping

App Files Files Community

pvanand commited on Dec 15, 2024

Commit

1f6995c

verified ·

1 Parent(s): df76ee8

add update to system message

Browse files

Files changed (1) hide show

main.py +65 -52

main.py CHANGED Viewed

@@ -4,6 +4,7 @@ from fastapi.responses import StreamingResponse
 from langchain_core.messages import (
     BaseMessage,
     HumanMessage,
     trim_messages,
 )
 from langchain_core.tools import tool
@@ -22,6 +23,8 @@ import requests
 from sse_starlette.sse import EventSourceResponse
 from fastapi.middleware.cors import CORSMiddleware
 import re
 app = FastAPI()
 app.include_router(document_rag_router)
@@ -34,6 +37,14 @@ app.add_middleware(
     allow_headers=["*"],
 )
 @tool
 def get_user_age(name: str) -> str:
     """Use this tool to find the user's age."""
@@ -45,7 +56,6 @@ def get_user_age(name: str) -> str:
 async def query_documents(
     query: str,
     config: RunnableConfig,
-    #state: Annotated[dict, InjectedState]
 ) -> str:
     """Use this tool to retrieve relevant data from the collection.
@@ -89,11 +99,9 @@ async def query_documents(
         print(e)
         return f"Error querying documents: {e} PAUSE AND ASK USER FOR HELP"
 async def query_documents_raw(
     query: str,
     config: RunnableConfig,
-    #state: Annotated[dict, InjectedState]
 ) -> SearchResult:
     """Use this tool to retrieve relevant data from the collection.
@@ -126,22 +134,60 @@ async def query_documents_raw(
 memory = MemorySaver()
 model = ChatOpenAI(model="gpt-4o-mini", streaming=True)
-def state_modifier(state) -> list[BaseMessage]:
-    return trim_messages(
-        state["messages"],
-        token_counter=len,
-        max_tokens=16000,
-        strategy="last",
-        start_on="human",
-        include_system=True,
-        allow_partial=False,
-    )
 agent = create_react_agent(
     model,
     tools=[query_documents],
     checkpointer=memory,
-    state_modifier=state_modifier,
 )
 class ChatInput(BaseModel):
@@ -190,43 +236,6 @@ async def chat(input_data: ChatInput):
         media_type="text/event-stream"
     )
-async def clean_tool_input(tool_input: str):
-    # Use regex to parse the first key and value
-    pattern = r"{\s*'([^']+)':\s*'([^']+)'"
-    match = re.search(pattern, tool_input)
-    if match:
-        key, value = match.groups()
-        return {key: value}
-    return [tool_input]
-async def clean_tool_response(tool_output: str):
-    """Clean and extract relevant information from tool response if it contains query_documents."""
-    if "query_documents" in tool_output:
-        try:
-            # First safely evaluate the string as a Python literal
-            import ast
-            print(tool_output)
-            # Extract the list string from the content
-            start = tool_output.find("[{")
-            end = tool_output.rfind("}]") + 2
-            if start >= 0 and end > 0:
-                list_str = tool_output[start:end]
-                # Convert string to Python object using ast.literal_eval
-                results = ast.literal_eval(list_str)
-                # Return only relevant fields
-                return [{"text": r["text"], "document_id": r["metadata"]["document_id"]}
-                       for r in results]
-        except SyntaxError as e:
-            print(f"Syntax error in parsing: {e}")
-            return f"Error parsing document results: {str(e)}"
-        except Exception as e:
-            print(f"General error: {e}")
-            return f"Error processing results: {str(e)}"
-    return tool_output
 @app.post("/chat2")
 async def chat2(input_data: ChatInput):
     thread_id = input_data.thread_id or str(uuid.uuid4())
@@ -290,4 +299,8 @@ async def chat2(input_data: ChatInput):
 @app.get("/health")
 async def health_check():
-    return {"status": "healthy"}

 from langchain_core.messages import (
     BaseMessage,
     HumanMessage,
+    SystemMessage,
     trim_messages,
 )
 from langchain_core.tools import tool
 from sse_starlette.sse import EventSourceResponse
 from fastapi.middleware.cors import CORSMiddleware
 import re
+import os
+from langchain_core.prompts import ChatPromptTemplate
 app = FastAPI()
 app.include_router(document_rag_router)
     allow_headers=["*"],
 )
+def get_current_files():
+    """Get list of files in current directory"""
+    try:
+        files = os.listdir('.')
+        return ", ".join(files)
+    except Exception as e:
+        return f"Error getting files: {str(e)}"
 @tool
 def get_user_age(name: str) -> str:
     """Use this tool to find the user's age."""
 async def query_documents(
     query: str,
     config: RunnableConfig,
 ) -> str:
     """Use this tool to retrieve relevant data from the collection.
         print(e)
         return f"Error querying documents: {e} PAUSE AND ASK USER FOR HELP"
 async def query_documents_raw(
     query: str,
     config: RunnableConfig,
 ) -> SearchResult:
     """Use this tool to retrieve relevant data from the collection.
 memory = MemorySaver()
 model = ChatOpenAI(model="gpt-4o-mini", streaming=True)
+# Create a prompt template for formatting
+prompt = ChatPromptTemplate.from_messages([
+    ("system", "You are a helpful AI assistant. Current directory contains: {current_files}"),
+    ("placeholder", "{messages}"),
+])
+def format_for_model(state):
+    return prompt.invoke({
+        "current_files": get_current_files(),
+        "messages": state["messages"]
+    })
+async def clean_tool_input(tool_input: str):
+    # Use regex to parse the first key and value
+    pattern = r"{\s*'([^']+)':\s*'([^']+)'"
+    match = re.search(pattern, tool_input)
+    if match:
+        key, value = match.groups()
+        return {key: value}
+    return [tool_input]
+async def clean_tool_response(tool_output: str):
+    """Clean and extract relevant information from tool response if it contains query_documents."""
+    if "query_documents" in tool_output:
+        try:
+            # First safely evaluate the string as a Python literal
+            import ast
+            print(tool_output)
+            # Extract the list string from the content
+            start = tool_output.find("[{")
+            end = tool_output.rfind("}]") + 2
+            if start >= 0 and end > 0:
+                list_str = tool_output[start:end]
+                # Convert string to Python object using ast.literal_eval
+                results = ast.literal_eval(list_str)
+                # Return only relevant fields
+                return [{"text": r["text"], "document_id": r["metadata"]["document_id"]}
+                       for r in results]
+        except SyntaxError as e:
+            print(f"Syntax error in parsing: {e}")
+            return f"Error parsing document results: {str(e)}"
+        except Exception as e:
+            print(f"General error: {e}")
+            return f"Error processing results: {str(e)}"
+    return tool_output
 agent = create_react_agent(
     model,
     tools=[query_documents],
     checkpointer=memory,
+    state_modifier=format_for_model,
 )
 class ChatInput(BaseModel):
         media_type="text/event-stream"
     )
 @app.post("/chat2")
 async def chat2(input_data: ChatInput):
     thread_id = input_data.thread_id or str(uuid.uuid4())
 @app.get("/health")
 async def health_check():
+    return {"status": "healthy"}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)