Spaces:

veerukhannan
/

advisor

Sleeping

App Files Files Community

veerukhannan commited on Nov 23, 2024

Commit

eca979c

verified ·

1 Parent(s): 14bc0aa

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -44

app.py CHANGED Viewed

@@ -5,12 +5,21 @@ from openai import OpenAI
 import json
 from typing import List, Dict
 import re
 class LegalAssistant:
     def __init__(self):
         # Initialize ChromaDB
         self.chroma_client = chromadb.Client()
-        self.collection = self.chroma_client.get_or_create_collection("legal_documents")
         # Initialize Mistral AI client
         self.mistral_client = OpenAI(
@@ -19,40 +28,104 @@ class LegalAssistant:
         )
         # Define system prompt with strict rules
-        self.system_prompt = """You are a specialized legal assistant trained on Indian law. You MUST follow these strict rules:
 RESPONSE FORMAT RULES:
 1. ALWAYS structure your response in this exact JSON format:
    {
-     "answer": "Your detailed answer here",
-     "reference_sections": ["Section X of Act Y", ...],
-     "summary": "2-3 line summary",
-     "confidence": "HIGH/MEDIUM/LOW"
    }
-CONTENT RULES:
-1. NEVER make assumptions or provide information not supported by Indian law
-2. ALWAYS cite specific sections, acts, and legal precedents
-3. If information is insufficient, explicitly state "Insufficient information" in answer
-4. NEVER provide legal advice, only legal information
-5. For any constitutional matters, ALWAYS cite relevant Articles
-ACCURACY RULES:
-1. If confidence is less than 80%, mark as LOW confidence
-2. If multiple interpretations exist, list ALL with citations
-3. If law has been amended, specify the latest amendment date
-4. For case law, cite the full case reference
-PROHIBITED:
-1. NO personal opinions
-2. NO hypothetical scenarios
-3. NO interpretation of ongoing cases
-4. NO advice on specific legal situations
 ERROR HANDLING:
-1. If query is unclear: Request clarification
-2. If outside Indian law scope: State "Outside scope of Indian law"
-3. If conflicting laws exist: List all applicable laws"""
     def validate_query(self, query: str) -> tuple[bool, str]:
         """Validate the input query"""
@@ -69,17 +142,27 @@ ERROR HANDLING:
         try:
             results = self.collection.query(
                 query_texts=[query],
-                n_results=3
             )
             if results and results['documents']:
                 documents = results['documents'][0]
-                metadata = results.get('metadatas', [[]])[0]
-                sources = [m.get('source', 'Unknown') for m in metadata]
-                return "\n\n".join(documents), sources
             return "", []
         except Exception as e:
-            print(f"Search error: {str(e)}")
             return "", []
     def get_response(self, query: str) -> Dict:
@@ -98,10 +181,26 @@ ERROR HANDLING:
             # Get relevant context from ChromaDB
             context, sources = self._search_documents(query)
-            # Prepare content
-            content = f"""Context: {context}
-Sources: {', '.join(sources)}
-Question: {query}""" if context else query
             # Get response from Mistral AI
             response = self.mistral_client.chat.completions.create(
@@ -124,13 +223,29 @@ Question: {query}""" if context else query
             if response.choices and len(response.choices) > 0:
                 try:
                     result = json.loads(response.choices[0].message.content)
                     return {
                         "answer": result.get("answer", "No answer provided"),
-                        "references": result.get("reference_sections", []),
                         "summary": result.get("summary", ""),
                         "confidence": result.get("confidence", "LOW")
                     }
                 except json.JSONDecodeError:
                     return {
                         "answer": "Error: Response format invalid",
                         "references": [],
@@ -139,13 +254,14 @@ Question: {query}""" if context else query
                     }
             return {
-                "answer": "No response received",
                 "references": [],
                 "summary": "Response generation failed",
                 "confidence": "LOW"
             }
         except Exception as e:
             return {
                 "answer": f"Error: {str(e)}",
                 "references": [],
@@ -158,6 +274,7 @@ assistant = LegalAssistant()
 # Create Gradio interface
 def process_query(query: str) -> tuple:
     response = assistant.get_response(query)
     return (
         response["answer"],
@@ -180,7 +297,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         query_input = gr.Textbox(
             label="Enter your legal query",
-            placeholder="e.g., What is the legal age for marriage in India as per current laws?"
         )
     with gr.Row():
@@ -194,15 +311,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column():
-            references_output = gr.Textbox(label="Legal References", lines=3)
         with gr.Column():
             summary_output = gr.Textbox(label="Summary", lines=2)
     gr.Markdown("""
     ### Important Notes:
-    - This assistant provides legal information, not legal advice
-    - Always verify information with a qualified legal professional
-    - Information is based on Indian law only
     """)
     submit_btn.click(
@@ -212,4 +329,5 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     )
 # Launch the app
-demo.launch()

 import json
 from typing import List, Dict
 import re
+from sentence_transformers import SentenceTransformer
+from loguru import logger
 class LegalAssistant:
     def __init__(self):
         # Initialize ChromaDB
         self.chroma_client = chromadb.Client()
+        self.collection = self.chroma_client.get_or_create_collection(
+            name="legal_documents",
+            embedding_function=SentenceTransformer('all-MiniLM-L6-v2')
+        )
+        # Load documents if collection is empty
+        if self.collection.count() == 0:
+            self._load_documents()
         # Initialize Mistral AI client
         self.mistral_client = OpenAI(
         )
         # Define system prompt with strict rules
+        self.system_prompt = """You are a specialized legal assistant that MUST follow these STRICT rules:
+CRITICAL RULE:
+YOU MUST ONLY USE INFORMATION FROM THE PROVIDED CONTEXT. DO NOT USE ANY EXTERNAL KNOWLEDGE, INCLUDING KNOWLEDGE ABOUT IPC, CONSTITUTION, OR ANY OTHER LEGAL DOCUMENTS.
 RESPONSE FORMAT RULES:
 1. ALWAYS structure your response in this exact JSON format:
    {
+     "answer": "Your detailed answer here using ONLY information from the provided context",
+     "reference_sections": ["Exact section titles from the context"],
+     "summary": "2-3 line summary using ONLY information from context",
+     "confidence": "HIGH/MEDIUM/LOW based on context match"
+   }
+STRICT CONTENT RULES:
+1. NEVER mention or reference IPC, Constitution, or any laws not present in the context
+2. If the information is not in the context, respond ONLY with:
+   {
+     "answer": "This information is not present in the provided document.",
+     "reference_sections": [],
+     "summary": "Information not found in document",
+     "confidence": "LOW"
    }
+3. ONLY cite sections that are explicitly present in the provided context
+4. DO NOT make assumptions or inferences beyond the context
+5. DO NOT combine information from external knowledge
+CONTEXT USAGE RULES:
+1. HIGH confidence: Only when exact information is found in context
+2. MEDIUM confidence: When partial information is found
+3. LOW confidence: When information is unclear or not found
+4. If multiple sections are relevant, cite ALL relevant sections from context
+PROHIBITED ACTIONS:
+1. NO references to IPC sections
+2. NO references to Constitutional articles
+3. NO mentions of case law not in context
+4. NO legal interpretations beyond context
+5. NO combining document information with external knowledge
 ERROR HANDLING:
+1. If query is about laws not in context: State "This topic is not covered in the provided document"
+2. If query is unclear: Request specific clarification about which part of the document to check
+3. If context is insufficient: State "The document does not contain this information"
+"""
+    def _load_documents(self):
+        """Load and index documents from a2023-45.txt and index.txt"""
+        try:
+            # Read the main document
+            with open('a2023-45.txt', 'r', encoding='utf-8') as f:
+                document = f.read()
+            # Read the index
+            with open('index.txt', 'r', encoding='utf-8') as f:
+                index_content = f.readlines()
+            # Parse index and split document
+            sections = []
+            current_section = ""
+            current_title = ""
+            for line in document.split('\n'):
+                if any(index_line.strip() in line for index_line in index_content):
+                    if current_section:
+                        sections.append({
+                            "title": current_title,
+                            "content": current_section.strip()
+                        })
+                    current_title = line.strip()
+                    current_section = ""
+                else:
+                    current_section += line + "\n"
+            # Add the last section
+            if current_section:
+                sections.append({
+                    "title": current_title,
+                    "content": current_section.strip()
+                })
+            # Add to ChromaDB
+            for i, section in enumerate(sections):
+                self.collection.add(
+                    documents=[section["content"]],
+                    metadatas=[{
+                        "title": section["title"],
+                        "source": "a2023-45.txt",
+                        "section_number": i + 1
+                    }],
+                    ids=[f"section_{i+1}"]
+                )
+            logger.info(f"Loaded {len(sections)} sections into ChromaDB")
+        except Exception as e:
+            logger.error(f"Error loading documents: {str(e)}")
+            raise
     def validate_query(self, query: str) -> tuple[bool, str]:
         """Validate the input query"""
         try:
             results = self.collection.query(
                 query_texts=[query],
+                n_results=3,
+                include=["metadatas", "documents"]
             )
             if results and results['documents']:
                 documents = results['documents'][0]
+                metadata = results['metadatas'][0]
+                # Format the context with section titles
+                formatted_docs = []
+                references = []
+                for doc, meta in zip(documents, metadata):
+                    formatted_docs.append(f"{meta['title']}:\n{doc}")
+                    references.append(f"{meta['title']} (Section {meta['section_number']})")
+                return "\n\n".join(formatted_docs), references
             return "", []
         except Exception as e:
+            logger.error(f"Search error: {str(e)}")
             return "", []
     def get_response(self, query: str) -> Dict:
             # Get relevant context from ChromaDB
             context, sources = self._search_documents(query)
+            if not context:
+                return {
+                    "answer": "This information is not present in the provided document.",
+                    "references": [],
+                    "summary": "Information not found in document",
+                    "confidence": "LOW"
+                }
+            # Prepare content with explicit instructions
+            content = f"""IMPORTANT: ONLY use information from the following context to answer the question. DO NOT use any external knowledge.
+Context Sections:
+{context}
+Available Document Sections:
+{', '.join(sources)}
+Question: {query}
+Remember: ONLY use information from the above context. If the information is not in the context, state that it's not in the document."""
             # Get response from Mistral AI
             response = self.mistral_client.chat.completions.create(
             if response.choices and len(response.choices) > 0:
                 try:
                     result = json.loads(response.choices[0].message.content)
+                    # Validate that references only contain sections from sources
+                    valid_references = [ref for ref in result.get("reference_sections", [])
+                                     if any(source in ref for source in sources)]
+                    # If references mention unauthorized sources, return error
+                    if len(valid_references) != len(result.get("reference_sections", [])):
+                        logger.warning("Response contained unauthorized references")
+                        return {
+                            "answer": "Error: Response contained unauthorized references. Only information from the provided document is allowed.",
+                            "references": [],
+                            "summary": "Invalid response generated",
+                            "confidence": "LOW"
+                        }
                     return {
                         "answer": result.get("answer", "No answer provided"),
+                        "references": valid_references,
                         "summary": result.get("summary", ""),
                         "confidence": result.get("confidence", "LOW")
                     }
                 except json.JSONDecodeError:
+                    logger.error("Failed to parse response JSON")
                     return {
                         "answer": "Error: Response format invalid",
                         "references": [],
                     }
             return {
+                "answer": "No valid response received",
                 "references": [],
                 "summary": "Response generation failed",
                 "confidence": "LOW"
             }
         except Exception as e:
+            logger.error(f"Error in get_response: {str(e)}")
             return {
                 "answer": f"Error: {str(e)}",
                 "references": [],
 # Create Gradio interface
 def process_query(query: str) -> tuple:
+    """Process the query and return formatted response"""
     response = assistant.get_response(query)
     return (
         response["answer"],
     with gr.Row():
         query_input = gr.Textbox(
             label="Enter your legal query",
+            placeholder="e.g., What are the main provisions in this document?"
         )
     with gr.Row():
     with gr.Row():
         with gr.Column():
+            references_output = gr.Textbox(label="Document References", lines=3)
         with gr.Column():
             summary_output = gr.Textbox(label="Summary", lines=2)
     gr.Markdown("""
     ### Important Notes:
+    - Responses are based ONLY on the provided document
+    - No external legal knowledge is used
+    - All references are from the document itself
     """)
     submit_btn.click(
     )
 # Launch the app
+if __name__ == "__main__":
+    demo.launch()