Spaces:

realrohilbansal
/

LegalAlly

Sleeping

App Files Files Community

Rohil Bansal commited on Sep 8, 2024

Commit

4adc02d

1 Parent(s): dc3ed8e

Chatbot working.

Browse files

Files changed (18) hide show

app.py +1 -1
assets/data/{Mandel-IntroEconTheory.pdf → Constitution.pdf} +2 -2
assets/data/IPC.pdf +3 -0
assets/data/IPC_and_Constitution.pdf +3 -0
graphs/workflow_graph.jpg +3 -0
src/__pycache__/buildgraph.cpython-312.pyc +0 -0
src/__pycache__/graph.cpython-312.pyc +0 -0
src/__pycache__/index.cpython-312.pyc +0 -0
src/__pycache__/llm.cpython-312.pyc +0 -0
src/buildgraph.py +60 -11
src/graph.py +86 -116
src/index.py +23 -8
src/llm.py +40 -16
vectordb/{08d73b15-e800-45c5-a450-5b9d696166f3 → 65ba2328-ffa1-497d-b641-c6b84db7f0e1}/data_level0.bin +0 -0
vectordb/{08d73b15-e800-45c5-a450-5b9d696166f3 → 65ba2328-ffa1-497d-b641-c6b84db7f0e1}/header.bin +0 -0
vectordb/{08d73b15-e800-45c5-a450-5b9d696166f3 → 65ba2328-ffa1-497d-b641-c6b84db7f0e1}/length.bin +1 -1
vectordb/{08d73b15-e800-45c5-a450-5b9d696166f3 → 65ba2328-ffa1-497d-b641-c6b84db7f0e1}/link_lists.bin +0 -0
vectordb/chroma.sqlite3 +2 -2

app.py CHANGED Viewed

@@ -71,7 +71,7 @@ if prompt := st.chat_input("What is your question?"):
         full_response = "⚠️ **_Note: Information provided may be inaccurate._** \n\n\n"
         for char in response_content:
             full_response += char
-            time.sleep(0.02)  # Adjust this value to control the speed of typing
             message_placeholder.markdown(full_response + "▌")
         message_placeholder.markdown(full_response)

         full_response = "⚠️ **_Note: Information provided may be inaccurate._** \n\n\n"
         for char in response_content:
             full_response += char
+            time.sleep(0.03)
             message_placeholder.markdown(full_response + "▌")
         message_placeholder.markdown(full_response)

assets/data/{Mandel-IntroEconTheory.pdf → Constitution.pdf} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:56bff927ff089b122126eb35003029a7335e46f0c2f0c1b6570b59bc673997b2
-size 607287

 version https://git-lfs.github.com/spec/v1
+oid sha256:043686f3266b9a88fd7949f87120520e544aebd20189ee4fbfb246e871333540
+size 655093

assets/data/IPC.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef8945c5d1b02904da67959e245b87bd5751ed5563d03ab0079758909f145309
+size 842456

assets/data/IPC_and_Constitution.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d44daff2e1184960f888e303558384322b719cfb82cdc1f50dec07794a7ed554
+size 1500316

graphs/workflow_graph.jpg ADDED Viewed

Git LFS Details

SHA256: 4836e309eb63da6ec13d334f427050135e3891a46e8872879ce5f8e65e24adb2
Pointer size: 130 Bytes
Size of remote file: 39.4 kB

src/__pycache__/buildgraph.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/buildgraph.cpython-312.pyc and b/src/__pycache__/buildgraph.cpython-312.pyc differ

src/__pycache__/graph.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/graph.cpython-312.pyc and b/src/__pycache__/graph.cpython-312.pyc differ

src/__pycache__/index.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/index.cpython-312.pyc and b/src/__pycache__/index.cpython-312.pyc differ

src/__pycache__/llm.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/llm.cpython-312.pyc and b/src/__pycache__/llm.cpython-312.pyc differ

src/buildgraph.py CHANGED Viewed

@@ -1,9 +1,7 @@
 from src.graph import *
-from pprint import pprint
 from langgraph.graph import END, StateGraph, START
 import sys
 from langgraph.checkpoint.memory import MemorySaver
-import json
 memory = MemorySaver()
@@ -12,23 +10,33 @@ try:
     workflow = StateGraph(GraphState)
     print("Adding nodes to the graph...")
-    workflow.add_node("web_search", web_search)
     workflow.add_node("retrieve", retrieve)
     workflow.add_node("grade_documents", grade_documents)
     workflow.add_node("generate", generate)
     workflow.add_node("transform_query", transform_query)
     print("Nodes added successfully.")
     print("Building graph edges...")
     workflow.add_conditional_edges(
-        START,
-        route_question,
         {
-            "web_search": "web_search",
-            "vectorstore": "retrieve",
-        },
     )
-    workflow.add_edge("web_search", "generate")
     workflow.add_edge("retrieve", "grade_documents")
     workflow.add_conditional_edges(
         "grade_documents",
@@ -48,12 +56,52 @@ try:
             "not useful": "transform_query",
         },
     )
     print("Graph edges built successfully.")
     print("Compiling the workflow...")
     app = workflow.compile(checkpointer=memory)
     print("Workflow compiled successfully.")
 except Exception as e:
     print(f"Error building the graph: {e}")
     sys.exit(1)
@@ -74,8 +122,9 @@ def run_workflow(question, config):
         final_output = None
         for output in app.stream(input_state, config):
             for key, value in output.items():
-                print(f"Node '{key}':")
-                if key == "generate":
                     final_output = value
         if final_output is None:

 from src.graph import *
 from langgraph.graph import END, StateGraph, START
 import sys
 from langgraph.checkpoint.memory import MemorySaver
 memory = MemorySaver()
     workflow = StateGraph(GraphState)
     print("Adding nodes to the graph...")
+    workflow.add_node("understand_intent", understand_intent)
+    # workflow.add_node("intent_aware_response", intent_aware_response)
+    workflow.add_node("greeting", greeting)
+    workflow.add_node("off_topic", off_topic)
+    workflow.add_node("route_question", route_question)
     workflow.add_node("retrieve", retrieve)
+    workflow.add_node("web_search", web_search)
     workflow.add_node("grade_documents", grade_documents)
     workflow.add_node("generate", generate)
     workflow.add_node("transform_query", transform_query)
     print("Nodes added successfully.")
     print("Building graph edges...")
+    workflow.add_edge(START, "understand_intent")
     workflow.add_conditional_edges(
+        "understand_intent",
+        intent_aware_response,
         {
+            "off_topic": "off_topic",
+            "greeting": "greeting",
+            "route_question": "route_question",
+        }
     )
+    workflow.add_edge("greeting", END)
+    workflow.add_edge("off_topic", END)
     workflow.add_edge("retrieve", "grade_documents")
     workflow.add_conditional_edges(
         "grade_documents",
             "not useful": "transform_query",
         },
     )
+    workflow.add_conditional_edges(
+        "route_question",
+        lambda x: x["route_question"],
+        {
+            "web_search": "web_search",
+            "vectorstore": "retrieve",
+        }
+    )
     print("Graph edges built successfully.")
     print("Compiling the workflow...")
     app = workflow.compile(checkpointer=memory)
     print("Workflow compiled successfully.")
+    try:
+        from IPython import get_ipython
+        from IPython.display import Image, display
+        # Check if we're in an IPython environment
+        if get_ipython() is not None:
+            print("Attempting to display graph visualization...")
+            graph_image = app.get_graph().draw_mermaid_png()
+            display(Image(graph_image))
+            print("Graph visualization displayed successfully.")
+        else:
+            print("Not running in IPython environment. Saving graph as JPG...")
+            import os
+            from PIL import Image
+            import io
+            graph_image = app.get_graph().draw_mermaid_png()
+            img = Image.open(io.BytesIO(graph_image))
+            img = img.convert('RGB')
+            # Create a 'graphs' directory if it doesn't exist
+            if not os.path.exists('graphs'):
+                os.makedirs('graphs')
+            img.save('graphs/workflow_graph.jpg', 'JPEG')
+            print("Graph saved as 'graphs/workflow_graph.jpg'")
+    except ImportError as e:
+        print(f"Required libraries not available. Graph visualization skipped. Error: {e}")
+    except Exception as e:
+        print(f"Error handling graph visualization: {e}")
+        print("Graph visualization skipped.")
 except Exception as e:
     print(f"Error building the graph: {e}")
     sys.exit(1)
         final_output = None
         for output in app.stream(input_state, config):
             for key, value in output.items():
+                print(f"Node '{key}'")
+                # print(f"Value: {json.dumps(value, default=str)}")  # Debug print
+                if key in ["generate", "off_topic", "greeting"]:
                     final_output = value
         if final_output is None:

src/graph.py CHANGED Viewed

@@ -2,94 +2,96 @@ from typing import List, Dict
 from typing_extensions import TypedDict
 from src.websearch import *
 from src.llm import *
-#%%
 class GraphState(TypedDict):
-    """
-    Represents the state of our graph.
-    Attributes:
-        question: current question
-        generation: LLM generation
-        documents: list of documents
-        chat_history: list of previous messages
-    """
     question: str
     generation: str
     documents: List[str]
     chat_history: List[Dict[str, str]]
-#%%
-from langchain.schema import Document
 def retrieve(state):
-    """
-    Retrieve documents
-    Args:
-        state (dict): The current graph state
-    Returns:
-        state (dict): New key added to state, documents, that contains retrieved documents
-    """
     print("---RETRIEVE---")
     question = state["question"]
-    # Retrieval
     documents = retriever.invoke(question)
     return {"documents": documents, "question": question}
 def generate(state):
-    """
-    Generate answer
-    Args:
-        state (dict): The current graph state
-    Returns:
-        state (dict): New key added to state, generation, that contains LLM generation
-    """
     print("---GENERATE---")
     question = state["question"]
     documents = state["documents"]
     chat_history = state.get("chat_history", [])
-    # Prepare context from chat history
-    context = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history[-5:]])  # Use last 5 messages for context
-    # RAG generation
-    generation = rag_chain.invoke({
-        "context": documents,
-        "question": question,
-        "chat_history": context
-    })
     return {
         "documents": documents,
         "question": question,
-        "generation": generation,  # Remove the extra nesting
         "chat_history": chat_history + [{"role": "human", "content": question}, {"role": "ai", "content": generation}]
     }
 def grade_documents(state):
-    """
-    Determines whether the retrieved documents are relevant to the question.
-    Args:
-        state (dict): The current graph state
-    Returns:
-        state (dict): Updates documents key with only filtered relevant documents
-    """
     print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
     question = state["question"]
     documents = state["documents"]
-    # Score each doc
     filtered_docs = []
     for d in documents:
         score = retrieval_grader.invoke(
@@ -104,45 +106,20 @@ def grade_documents(state):
             continue
     return {"documents": filtered_docs, "question": question}
 def transform_query(state):
-    """
-    Transform the query to produce a better question.
-    Args:
-        state (dict): The current graph state
-    Returns:
-        state (dict): Updates question key with a re-phrased question
-    """
     print("---TRANSFORM QUERY---")
     question = state["question"]
     documents = state["documents"]
-    # Re-write question
     better_question = question_rewriter.invoke({"question": question})
     return {"documents": documents, "question": better_question}
 def web_search(state):
-    """
-    Web search based on the re-phrased question.
-    Args:
-        state (dict): The current graph state
-    Returns:
-        state (dict): Updates documents key with appended web results
-    """
     print("---WEB SEARCH---")
     question = state["question"]
-    # Web search
     web_results = web_search_tool.invoke({"query": question})
-    # Check if web_results is a string (single result) or a list of results
     if isinstance(web_results, str):
         web_results = [{"content": web_results}]
     elif isinstance(web_results, list):
@@ -155,10 +132,6 @@ def web_search(state):
     return {"documents": [web_document], "question": question}
-### Edges ###
 def route_question(state):
     """
     Route question to web search or RAG.
@@ -167,59 +140,47 @@ def route_question(state):
         state (dict): The current graph state
     Returns:
-        str: Next node to call
     """
     print("---ROUTE QUESTION---")
     question = state["question"]
     source = question_router.invoke({"question": question})
     if source.datasource == "web_search":
         print("---ROUTE QUESTION TO WEB SEARCH---")
-        return "web_search"
     elif source.datasource == "vectorstore":
         print("---ROUTE QUESTION TO RAG---")
-        return "vectorstore"
 def decide_to_generate(state):
-    """
-    Determines whether to generate an answer, or re-generate a question.
-    Args:
-        state (dict): The current graph state
-    Returns:
-        str: Binary decision for next node to call
-    """
     print("---ASSESS GRADED DOCUMENTS---")
     state["question"]
     filtered_documents = state["documents"]
     if not filtered_documents:
-        # All documents have been filtered check_relevance
-        # We will re-generate a new query
         print(
             "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---"
         )
         return "transform_query"
     else:
-        # We have relevant documents, so generate answer
         print("---DECISION: GENERATE---")
         return "generate"
 def grade_generation_v_documents_and_question(state):
-    """
-    Determines whether the generation is grounded in the document and answers question.
-    Args:
-        state (dict): The current graph state
-    Returns:
-        str: Decision for next node to call
-    """
     print("---CHECK HALLUCINATIONS---")
     question = state["question"]
     documents = state["documents"]
@@ -230,11 +191,8 @@ def grade_generation_v_documents_and_question(state):
     )
     grade = score.binary_score
-    # Check hallucination
     if grade == "yes":
         print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
-        # Check question-answering
-        print("---GRADE GENERATION vs QUESTION---")
         score = answer_grader.invoke({"question": question, "generation": generation})
         grade = score.binary_score
         if grade == "yes":
@@ -245,4 +203,16 @@ def grade_generation_v_documents_and_question(state):
             return "not useful"
     else:
         print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
-        return "not supported"

 from typing_extensions import TypedDict
 from src.websearch import *
 from src.llm import *
+from langchain.schema import Document, AIMessage
 class GraphState(TypedDict):
     question: str
     generation: str
     documents: List[str]
     chat_history: List[Dict[str, str]]
+def understand_intent(state):
+    print("---UNDERSTAND INTENT---")
+    question = state["question"].lower()
+    chat_history = state.get("chat_history", [])
+    # context = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history[-2:]])
+    intent = intent_classifier.invoke({"question": question})
+    print(f"Intent: {intent}")  # Debug print
+    return {"intent": intent, "question": question}
+def intent_aware_response(state):
+    print("---INTENT-AWARE RESPONSE---")
+    question = state["question"]
+    chat_history = state.get("chat_history", [])
+    intent = state.get("intent", "")
+    print(f"Responding to intent: {intent}")  # Debug print
+    # Check if intent is an IntentClassifier object
+    if hasattr(intent, 'intent'):
+        intent = intent.intent.lower()
+    elif isinstance(intent, str):
+        intent = intent.lower().strip("intent='").rstrip("'")
+    else:
+        print(f"Unexpected intent type: {type(intent)}")
+        intent = "unknown"
+    if intent == 'greeting':
+        return "greeting"
+    elif intent == 'off_topic':
+        return "off_topic"
+    elif intent in ["legal_query", "follow_up"]:
+        return "route_question"
+    else:
+        print(f"Unknown intent '{intent}', treating as off-topic")
+        return "off_topic"
 def retrieve(state):
     print("---RETRIEVE---")
     question = state["question"]
     documents = retriever.invoke(question)
     return {"documents": documents, "question": question}
 def generate(state):
     print("---GENERATE---")
     question = state["question"]
     documents = state["documents"]
     chat_history = state.get("chat_history", [])
+    context = "\n".join([doc.page_content for doc in documents])
+    chat_context = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history[-5:]])
+    generation_prompt = f"""
+    As LegalAlly, an AI assistant specializing in the Indian Penal Code, provide a helpful and informative response to the following question. Use the given context and chat history for reference.
+    Context:
+    {context}
+    Chat History:
+    {chat_context}
+    Question: {question}
+    Response:
+    """
+    generation = llm.invoke(generation_prompt)
+    generation = generation.content if hasattr(generation, 'content') else str(generation)
     return {
         "documents": documents,
         "question": question,
+        "generation": generation,
         "chat_history": chat_history + [{"role": "human", "content": question}, {"role": "ai", "content": generation}]
     }
 def grade_documents(state):
     print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
     question = state["question"]
     documents = state["documents"]
     filtered_docs = []
     for d in documents:
         score = retrieval_grader.invoke(
             continue
     return {"documents": filtered_docs, "question": question}
 def transform_query(state):
     print("---TRANSFORM QUERY---")
     question = state["question"]
     documents = state["documents"]
     better_question = question_rewriter.invoke({"question": question})
     return {"documents": documents, "question": better_question}
 def web_search(state):
     print("---WEB SEARCH---")
     question = state["question"]
     web_results = web_search_tool.invoke({"query": question})
     if isinstance(web_results, str):
         web_results = [{"content": web_results}]
     elif isinstance(web_results, list):
     return {"documents": [web_document], "question": question}
 def route_question(state):
     """
     Route question to web search or RAG.
         state (dict): The current graph state
     Returns:
+        dict: Updated state with routing information
     """
     print("---ROUTE QUESTION---")
     question = state["question"]
     source = question_router.invoke({"question": question})
     if source.datasource == "web_search":
         print("---ROUTE QUESTION TO WEB SEARCH---")
+        return {
+            "route_question": "web_search",
+            "question": question  # Maintain the current question
+        }
     elif source.datasource == "vectorstore":
         print("---ROUTE QUESTION TO RAG---")
+        return {
+            "route_question": "vectorstore",
+            "question": question  # Maintain the current question
+        }
+    else:
+        print("---UNKNOWN ROUTE, DEFAULTING TO RAG---")
+        return {
+            "route_question": "vectorstore",
+            "question": question  # Maintain the current question
+        }
 def decide_to_generate(state):
     print("---ASSESS GRADED DOCUMENTS---")
     state["question"]
     filtered_documents = state["documents"]
     if not filtered_documents:
         print(
             "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---"
         )
         return "transform_query"
     else:
         print("---DECISION: GENERATE---")
         return "generate"
 def grade_generation_v_documents_and_question(state):
     print("---CHECK HALLUCINATIONS---")
     question = state["question"]
     documents = state["documents"]
     )
     grade = score.binary_score
     if grade == "yes":
         print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
         score = answer_grader.invoke({"question": question, "generation": generation})
         grade = score.binary_score
         if grade == "yes":
             return "not useful"
     else:
         print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
+        return "not supported"
+def greeting(state):
+    print("---GREETING---")
+    return {
+        "generation": "Hello! I'm LegalAlly, an AI assistant specializing in Indian law, particularly the Indian Penal Code and Indian Constitution. How can I assist you today?"
+    }
+def off_topic(state):
+    print("---OFF-TOPIC---")
+    return {
+        "generation": "I apologize, but I specialize in matters related to the Indian Penal Code. Could you please ask a question about Indian law or legal matters?"
+    }

src/index.py CHANGED Viewed

@@ -6,6 +6,9 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.vectorstores import Chroma
 from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
 # Load environment variables
 load_dotenv()
@@ -53,7 +56,7 @@ def vector_store_exists(persist_directory):
 # Load and process documents
 try:
     print("Loading PDF document...")
-    docs = PyPDFLoader("assets/data/Mandel-IntroEconTheory.pdf").load()
     print("PDF loaded successfully.")
     print("Splitting documents...")
@@ -66,17 +69,29 @@ except Exception as e:
     print(f"Error processing documents: {e}")
     sys.exit(1)
 # Create or load vector store
 try:
     persist_directory = './vectordb'
     if not vector_store_exists(persist_directory):
         print("Creating new vector store...")
-        vectorstore = Chroma.from_documents(
-            documents=doc_splits,
-            collection_name="rag-chroma",
-            embedding=embd,
-            persist_directory=persist_directory
-        )
         print("New vector store created and populated.")
     else:
         print("Loading existing vector store...")
@@ -87,7 +102,7 @@ try:
         )
         print("Existing vector store loaded.")
-    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
     print("Retriever set up successfully.")
 except Exception as e:
     print(f"Error with vector store operations: {e}")

 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.vectorstores import Chroma
 from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
+import time
+from tenacity import retry, stop_after_attempt, wait_exponential
+from tqdm import tqdm  # Add this import for progress bar
 # Load environment variables
 load_dotenv()
 # Load and process documents
 try:
     print("Loading PDF document...")
+    docs = PyPDFLoader("assets/data/IPC_and_Constitution.pdf").load()
     print("PDF loaded successfully.")
     print("Splitting documents...")
     print(f"Error processing documents: {e}")
     sys.exit(1)
+@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10))
+def create_vector_store_batch(persist_directory, documents, embedding, batch_size=50):
+    vectorstore = None
+    for i in tqdm(range(0, len(documents), batch_size), desc="Processing batches"):
+        batch = documents[i:i+batch_size]
+        if vectorstore is None:
+            vectorstore = Chroma.from_documents(
+                documents=batch,
+                collection_name="rag-chroma",
+                embedding=embedding,
+                persist_directory=persist_directory
+            )
+        else:
+            vectorstore.add_documents(batch)
+        time.sleep(1)  # Add a small delay between batches
+    return vectorstore
 # Create or load vector store
 try:
     persist_directory = './vectordb'
     if not vector_store_exists(persist_directory):
         print("Creating new vector store...")
+        vectorstore = create_vector_store_batch(persist_directory, doc_splits, embd)
         print("New vector store created and populated.")
     else:
         print("Loading existing vector store...")
         )
         print("Existing vector store loaded.")
+    retriever = vectorstore.as_retriever()
     print("Retriever set up successfully.")
 except Exception as e:
     print(f"Error with vector store operations: {e}")

src/llm.py CHANGED Viewed

@@ -24,11 +24,11 @@ class RouteQuery(BaseModel):
 # llm = AzureChatOpenAI(model="gpt-4o-mini", temperature=0.3)
 structured_llm_router = llm.with_structured_output(RouteQuery)
-#%%
 # Prompt
 system = """You are an expert at routing a user question to a vectorstore or web search.
-The vectorstore contains documents related to basic marxist political economy. The contains documents from the book Introduction to Marxist Political Economy by Ernest Mandel.
-Use the vectorstore for questions on these topics. Otherwise, use web-search."""
 route_prompt = ChatPromptTemplate.from_messages(
     [
         ("system", system),
@@ -36,7 +36,6 @@ route_prompt = ChatPromptTemplate.from_messages(
     ]
 )
-#%%
 question_router = route_prompt | structured_llm_router
 # %%
@@ -50,8 +49,6 @@ class GradeDocuments(BaseModel):
         description="Documents are relevant to the question, 'yes' or 'no'"
     )
-#%%
 # LLM with function call
 # llm = AzureChatOpenAI(model="gpt-4o-mini", temperature=0.3)
 structured_llm_grader = llm.with_structured_output(GradeDocuments)
@@ -69,10 +66,12 @@ grade_prompt = ChatPromptTemplate.from_messages(
 )
 retrieval_grader = grade_prompt | structured_llm_grader
-question = "agent memory"
-docs = retriever.invoke(question)
-doc_txt = docs[1].page_content
-print(retrieval_grader.invoke({"question": question, "document": doc_txt}))
 #%%
@@ -95,9 +94,9 @@ def format_docs(docs):
 # Chain
 rag_chain = prompt | llm | StrOutputParser()
-# Run
-generation = rag_chain.invoke({"context": docs, "question": question})
-print(generation)
 #%%
@@ -128,7 +127,7 @@ hallucination_prompt = ChatPromptTemplate.from_messages(
 )
 hallucination_grader = hallucination_prompt | structured_llm_grader
-hallucination_grader.invoke({"documents": docs, "generation": generation})
 #%%
 ### Answer Grader
@@ -158,7 +157,7 @@ answer_prompt = ChatPromptTemplate.from_messages(
 )
 answer_grader = answer_prompt | structured_llm_grader
-answer_grader.invoke({"question": question, "generation": generation})
 #%%
 ### Question Re-writer
@@ -180,4 +179,29 @@ re_write_prompt = ChatPromptTemplate.from_messages(
 )
 question_rewriter = re_write_prompt | llm | StrOutputParser()
-question_rewriter.invoke({"question": question})

 # llm = AzureChatOpenAI(model="gpt-4o-mini", temperature=0.3)
 structured_llm_router = llm.with_structured_output(RouteQuery)
 # Prompt
 system = """You are an expert at routing a user question to a vectorstore or web search.
+The vectorstore contains documents related to Indian Penal Code and The Indian Constitution.
+It can answer most questions related to IPC and the Constitution.
+Use web-search if the answer is not in the vectorstore."""
 route_prompt = ChatPromptTemplate.from_messages(
     [
         ("system", system),
     ]
 )
 question_router = route_prompt | structured_llm_router
 # %%
         description="Documents are relevant to the question, 'yes' or 'no'"
     )
 # LLM with function call
 # llm = AzureChatOpenAI(model="gpt-4o-mini", temperature=0.3)
 structured_llm_grader = llm.with_structured_output(GradeDocuments)
 )
 retrieval_grader = grade_prompt | structured_llm_grader
+# question = "agent memory"
+# docs = retriever.invoke(question)
+# doc_txt = docs[1].page_content
+# print(retrieval_grader.invoke({"question": question, "document": doc_txt}))
 #%%
 # Chain
 rag_chain = prompt | llm | StrOutputParser()
+# # Run
+# generation = rag_chain.invoke({"context": docs, "question": question})
+# print(generation)
 #%%
 )
 hallucination_grader = hallucination_prompt | structured_llm_grader
+# hallucination_grader.invoke({"documents": docs, "generation": generation})
 #%%
 ### Answer Grader
 )
 answer_grader = answer_prompt | structured_llm_grader
+# answer_grader.invoke({"question": question, "generation": generation})
 #%%
 ### Question Re-writer
 )
 question_rewriter = re_write_prompt | llm | StrOutputParser()
+# question_rewriter.invoke({"question": question})
+class IntentClassifier(BaseModel):
+    """Classify the intent of the user query."""
+    intent: Literal["greeting", "legal_query", "follow_up", "off_topic"] = Field(
+        ...,
+        description="Classify the intent of the user query. 'greeting' if the user is saying greetings, 'legal_query' if the user is asking for information about law, 'follow_up' if the user is asking for information related to the previous conversation, 'off_topic' if the user is asking for information about anything else.",
+    )
+# LLM with function call
+# llm = AzureChatOpenAI(model="gpt-4o-mini", temperature=0.3)
+structured_llm_intent_classifier = llm.with_structured_output(IntentClassifier)
+# Prompt
+system = """You are an intent classifier that classifies the intent of a user query. \n
+    Give the intent as one of the following: 'greeting', 'legal_query', 'follow_up', 'off_topic'."""
+intent_classifier_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", system),
+        ("human", "Here is the user query: \n\n {question} \n\n Classify the intent of the user query."),
+    ]
+)
+intent_classifier = intent_classifier_prompt | structured_llm_intent_classifier

vectordb/{08d73b15-e800-45c5-a450-5b9d696166f3 → 65ba2328-ffa1-497d-b641-c6b84db7f0e1}/data_level0.bin RENAMED Viewed

File without changes

vectordb/{08d73b15-e800-45c5-a450-5b9d696166f3 → 65ba2328-ffa1-497d-b641-c6b84db7f0e1}/header.bin RENAMED Viewed

File without changes

vectordb/{08d73b15-e800-45c5-a450-5b9d696166f3 → 65ba2328-ffa1-497d-b641-c6b84db7f0e1}/length.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c46bdbd91f9ce31abb4f696e2ccd22cf2238401c3164a6636e356a2b808009fc
 size 4000

 version https://git-lfs.github.com/spec/v1
+oid sha256:f67fade90d336844894516fb804b85fd8b744c00595381c3203e9fd8f5db576b
 size 4000

vectordb/{08d73b15-e800-45c5-a450-5b9d696166f3 → 65ba2328-ffa1-497d-b641-c6b84db7f0e1}/link_lists.bin RENAMED Viewed

File without changes

vectordb/chroma.sqlite3 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5fea71fced5d6442c3fac0c49649f077f5ecf131a8f1ac0df38bba939fd3f8f8
-size 2387968

 version https://git-lfs.github.com/spec/v1
+oid sha256:aab4817b7e371b5ddea619c2acb598da4c22f8b8a47e32fd84528a50018b8668
+size 13512704