Spaces:

veerukhannan
/

advisor

Sleeping

App Files Files Community

veerukhannan commited on Nov 23, 2024

Commit

859da87

verified ·

1 Parent(s): 4ed9501

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -50

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from openai import OpenAI
 import json
 from sentence_transformers import SentenceTransformer
 from loguru import logger
-from test_embeddings import test_chromadb_content
 class SentenceTransformerEmbeddings:
     def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
@@ -20,7 +20,6 @@ class LegalAssistant:
         try:
             # Initialize and verify ChromaDB content
             logger.info("Initializing LegalAssistant...")
-            from test_embeddings import test_chromadb_content, initialize_chromadb
             # Try to verify content, if fails, try to initialize
             if not test_chromadb_content():
@@ -96,43 +95,48 @@ class LegalAssistant:
             for doc, meta in zip(results['documents'][0], results['metadatas'][0]):
                 context_parts.append(f"{meta['title']}:\n{doc}")
-                references.append(f"{meta['title']} (Section {meta['section_number']})")
             context = "\n\n".join(context_parts)
-            # Prepare content for Mistral AI
-            system_prompt = """You are a specialized legal assistant that MUST follow these STRICT rules:
-CRITICAL RULE:
-YOU MUST ONLY USE INFORMATION FROM THE PROVIDED CONTEXT. DO NOT USE ANY EXTERNAL KNOWLEDGE.
-RESPONSE FORMAT RULES:
-1. ALWAYS structure your response in this exact JSON format:
-   {
-     "answer": "Your detailed answer here using ONLY information from the provided context",
-     "reference_sections": ["Exact section titles from the context"],
-     "summary": "2-3 line summary using ONLY information from context",
-     "confidence": "HIGH/MEDIUM/LOW based on context match"
-   }
-STRICT CONTENT RULES:
-1. NEVER mention or reference any laws not present in the context
-2. If the information is not in the context, respond with LOW confidence
-3. ONLY cite sections that are explicitly present in the provided context
-4. DO NOT make assumptions or inferences beyond the context
-5. DO NOT combine information from external knowledge"""
-            content = f"""IMPORTANT: ONLY use information from the following context to answer the question.
-Context Sections:
 {context}
-Available Document Sections:
-{', '.join(references)}
 Question: {query}
-Remember: ONLY use information from the above context."""
             # Get response from Mistral AI
             response = self.mistral_client.chat.completions.create(
@@ -142,7 +146,8 @@ Remember: ONLY use information from the above context."""
                     {"role": "user", "content": content}
                 ],
                 temperature=0.1,
-                max_tokens=1000
             )
             # Parse and validate response
@@ -150,39 +155,56 @@ Remember: ONLY use information from the above context."""
                 try:
                     result = json.loads(response.choices[0].message.content)
-                    # Validate references
-                    valid_references = [ref for ref in result.get("reference_sections", [])
-                                     if any(source.split(" (Section")[0] in ref for source in references)]
-                    if len(valid_references) != len(result.get("reference_sections", [])):
-                        logger.warning("Response contained unauthorized references")
-                        return {
-                            "answer": "Error: Response contained unauthorized references",
-                            "references": [],
-                            "summary": "Invalid response generated",
-                            "confidence": "LOW"
-                        }
                     return {
-                        "answer": result.get("answer", "No answer provided"),
                         "references": valid_references,
-                        "summary": result.get("summary", ""),
-                        "confidence": result.get("confidence", "LOW")
                     }
-                except json.JSONDecodeError:
-                    logger.error("Failed to parse response JSON")
                     return {
-                        "answer": "Error: Invalid response format",
                         "references": [],
-                        "summary": "Response parsing failed",
                         "confidence": "LOW"
                     }
             return {
-                "answer": "No valid response received",
                 "references": [],
-                "summary": "Response generation failed",
                 "confidence": "LOW"
             }
@@ -240,10 +262,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column():
-            references_output = gr.Textbox(label="Document References", lines=3)
         with gr.Column():
             summary_output = gr.Textbox(label="Summary", lines=2)
     submit_btn.click(
         fn=process_query,
         inputs=[query_input],

 import json
 from sentence_transformers import SentenceTransformer
 from loguru import logger
+from test_embeddings import test_chromadb_content, initialize_chromadb
 class SentenceTransformerEmbeddings:
     def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
         try:
             # Initialize and verify ChromaDB content
             logger.info("Initializing LegalAssistant...")
             # Try to verify content, if fails, try to initialize
             if not test_chromadb_content():
             for doc, meta in zip(results['documents'][0], results['metadatas'][0]):
                 context_parts.append(f"{meta['title']}:\n{doc}")
+                references.append(meta['title'])
             context = "\n\n".join(context_parts)
+            # Prepare system prompt with explicit JSON format
+            system_prompt = '''You are a specialized legal assistant that MUST follow these STRICT rules:
+1. You MUST ONLY use information from the provided context.
+2. DO NOT use any external knowledge about laws, IPC, Constitution, or legal matters.
+3. Your response MUST be in this EXACT JSON format:
+{
+    "answer": "Your detailed answer using ONLY information from the context",
+    "reference_sections": ["List of section titles used from context"],
+    "summary": "Brief 2-3 line summary",
+    "confidence": "HIGH/MEDIUM/LOW"
+}
+Confidence Level Rules:
+- HIGH: When exact information is found in context
+- MEDIUM: When partial or indirect information is found
+- LOW: When information is unclear or not found
+If information is not in context, respond with:
+{
+    "answer": "This information is not present in the provided document.",
+    "reference_sections": [],
+    "summary": "Information not found in document",
+    "confidence": "LOW"
+}'''
+            # Prepare user content
+            content = f'''Context Sections:
 {context}
 Question: {query}
+IMPORTANT:
+1. Use ONLY the information from the above context
+2. Format your response as a valid JSON object with the exact structure shown above
+3. Include ONLY section titles that exist in the context
+4. DO NOT add any text outside the JSON structure
+5. Ensure the JSON is properly formatted with double quotes'''
             # Get response from Mistral AI
             response = self.mistral_client.chat.completions.create(
                     {"role": "user", "content": content}
                 ],
                 temperature=0.1,
+                max_tokens=1000,
+                response_format={ "type": "json_object" }
             )
             # Parse and validate response
                 try:
                     result = json.loads(response.choices[0].message.content)
+                    # Validate response structure
+                    required_fields = ["answer", "reference_sections", "summary", "confidence"]
+                    if not all(field in result for field in required_fields):
+                        raise ValueError("Missing required fields in response")
+                    # Validate confidence level
+                    if result["confidence"] not in ["HIGH", "MEDIUM", "LOW"]:
+                        result["confidence"] = "LOW"
+                    # Validate references against context
+                    valid_references = [ref for ref in result["reference_sections"]
+                                     if ref in references]
+                    # If references don't match, adjust confidence
+                    if len(valid_references) != len(result["reference_sections"]):
+                        result["reference_sections"] = valid_references
+                        result["confidence"] = "LOW"
+                    # Ensure answer and summary are strings
+                    result["answer"] = str(result["answer"])
+                    result["summary"] = str(result["summary"])
                     return {
+                        "answer": result["answer"],
                         "references": valid_references,
+                        "summary": result["summary"],
+                        "confidence": result["confidence"]
                     }
+                except json.JSONDecodeError as e:
+                    logger.error(f"JSON parsing error: {str(e)}")
+                    return {
+                        "answer": "Error: Failed to parse response format",
+                        "references": [],
+                        "summary": "Response format error",
+                        "confidence": "LOW"
+                    }
+                except ValueError as e:
+                    logger.error(f"Validation error: {str(e)}")
                     return {
+                        "answer": "Error: Invalid response structure",
                         "references": [],
+                        "summary": "Response validation error",
                         "confidence": "LOW"
                     }
             return {
+                "answer": "Error: No valid response received",
                 "references": [],
+                "summary": "No response generated",
                 "confidence": "LOW"
             }
     with gr.Row():
         with gr.Column():
+            references_output = gr.Textbox(label="Document References", lines=2)
         with gr.Column():
             summary_output = gr.Textbox(label="Summary", lines=2)
+    gr.Markdown("""
+    ### Important Notes:
+    - Responses are based ONLY on the provided document
+    - No external legal knowledge is used
+    - All references are from the document itself
+    - Confidence levels indicate how well the answer matches the document content
+    """)
     submit_btn.click(
         fn=process_query,
         inputs=[query_input],