Spaces:

veerukhannan
/

advisor

Sleeping

App Files Files Community

veerukhannan commited on Nov 23, 2024

Commit

e3f3bb1

verified ·

1 Parent(s): c9602aa

Update app.py

Browse files

Files changed (1) hide show

app.py +185 -164

app.py CHANGED Viewed

@@ -1,194 +1,215 @@
 import gradio as gr
-from typing import List, Dict
-from langchain_huggingface import HuggingFacePipeline  # Fixed import
-from langchain_core.prompts import ChatPromptTemplate
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import chromadb
-from chromadb.utils import embedding_functions
-import torch
 import os
-class LegalChatbot:
     def __init__(self):
-        print("Initializing Legal Chatbot...")
         # Initialize ChromaDB
         self.chroma_client = chromadb.Client()
-        # Initialize embedding function
-        self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
-            model_name="all-MiniLM-L6-v2",
-            device="cpu"
         )
-        # Create collection
-        self.collection = self.chroma_client.create_collection(
-            name="text_collection",
-            embedding_function=self.embedding_function,
-            metadata={"hnsw:space": "cosine"}
-        )
-        # Initialize the model - using a smaller model suitable for CPU
-        pipe = pipeline(
-            "text-generation",
-            model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-            max_new_tokens=512,
-            temperature=0.7,
-            top_p=0.95,
-            repetition_penalty=1.15,
-            device="cpu"
-        )
-        self.llm = HuggingFacePipeline(pipeline=pipe)
-        # Create prompt template
-        self.template = """
-        IMPORTANT: You are a helpful assistant that provides information about the Bharatiya Nyaya Sanhita, 2023 based on the retrieved context.
-        STRICT RULES:
-        1. Base your response ONLY on the provided context
-        2. If you cannot find relevant information, respond with: "I apologize, but I cannot find information about that in the database."
-        3. Do not make assumptions or use external knowledge
-        4. Be concise and accurate in your responses
-        5. If quoting from the context, clearly indicate it
-        Context: {context}
-        Chat History: {chat_history}
-        Question: {question}
-        Answer:"""
-        self.prompt = ChatPromptTemplate.from_template(self.template)
-        self.chat_history = ""
-        self.initialized = False
-    def _initialize_database(self) -> bool:
-        """Initialize the database with document content"""
-        try:
-            if self.initialized:
-                return True
-            print("Loading documents into database...")
-            # Read the main text file
-            with open('a2023-45.txt', 'r', encoding='utf-8') as f:
-                text_content = f.read()
-            # Read the index file
-            with open('index.txt', 'r', encoding='utf-8') as f:
-                index_lines = f.readlines()
-            # Create chunks
-            chunk_size = 512
-            chunks = []
-            for i in range(0, len(text_content), chunk_size):
-                chunk = text_content[i:i + chunk_size]
-                chunks.append(chunk)
-            # Add documents in batches
-            batch_size = 50
-            for i in range(0, len(chunks), batch_size):
-                batch = chunks[i:i + batch_size]
-                batch_ids = [f"doc_{j}" for j in range(i, i + len(batch))]
-                batch_metadata = [{
-                    "index": index_lines[j].strip() if j < len(index_lines) else f"Chunk {j+1}",
-                    "chunk_number": j
-                } for j in range(i, i + len(batch))]
-                self.collection.add(
-                    documents=batch,
-                    ids=batch_ids,
-                    metadatas=batch_metadata
-                )
-            self.initialized = True
-            return True
-        except Exception as e:
-            print(f"Error initializing database: {str(e)}")
-            return False
-    def _search_database(self, query: str) -> List[Dict]:
-        """Search the database for relevant documents"""
         try:
             results = self.collection.query(
                 query_texts=[query],
-                n_results=3,
-                include=["documents", "metadatas", "distances"]
             )
-            return [
-                {
-                    "content": doc,
-                    "metadata": meta,
-                    "score": 1 - dist
-                }
-                for doc, meta, dist in zip(
-                    results['documents'][0],
-                    results['metadatas'][0],
-                    results['distances'][0]
-                )
-            ]
         except Exception as e:
-            print(f"Error searching database: {str(e)}")
-            return []
-    def chat(self, query: str, history) -> str:
-        """Process a query and return a response"""
         try:
-            # Initialize database if needed
-            if not self.initialized and not self._initialize_database():
-                return "Error: Unable to initialize the database. Please try again."
-            # Search for relevant content
-            search_results = self._search_database(query)
-            if not search_results:
-                return "I apologize, but I cannot find information about that in the database."
-            # Extract and combine relevant content
-            context = "\n\n".join([
-                f"[Section {r['metadata']['index']}]\n{r['content']}"
-                for r in search_results
-            ])
-            # Generate response using LLM
-            chain = self.prompt | self.llm
-            result = chain.invoke({
-                "context": context,
-                "chat_history": self.chat_history,
-                "question": query
-            })
-            # Update chat history
-            self.chat_history += f"\nUser: {query}\nAI: {result}\n"
-            return result
         except Exception as e:
-            return f"Error processing query: {str(e)}"
-# Initialize the chatbot
-chatbot = LegalChatbot()
-# Create the Gradio interface
-iface = gr.ChatInterface(
-    chatbot.chat,
-    title="Bharatiya Nyaya Sanhita, 2023 - Legal Assistant",
-    description="Ask questions about the Bharatiya Nyaya Sanhita, 2023. The system will initialize on your first query.",
-    examples=[
-        "What is criminal conspiracy?",
-        "What are the punishments for corruption?",
-        "Explain the concept of culpable homicide",
-        "What constitutes theft under the act?"
-    ],
-    theme=gr.themes.Soft()
-)
-# Launch the interface
-if __name__ == "__main__":
-    iface.launch(
-        share=False,
-        show_error=True
-    )

 import gradio as gr
 import chromadb
 import os
+from openai import OpenAI
+import json
+from typing import List, Dict
+import re
+class LegalAssistant:
     def __init__(self):
         # Initialize ChromaDB
         self.chroma_client = chromadb.Client()
+        self.collection = self.chroma_client.get_or_create_collection("legal_documents")
+        # Initialize Mistral AI client
+        self.mistral_client = OpenAI(
+            api_key=os.environ.get("MISTRAL_API_KEY", "dfb2j1YDsa298GXTgZo3juSjZLGUCfwi"),
+            base_url="https://api.mistral.ai/v1"
         )
+        # Define system prompt with strict rules
+        self.system_prompt = """You are a specialized legal assistant trained on Indian law. You MUST follow these strict rules:
+RESPONSE FORMAT RULES:
+1. ALWAYS structure your response in this exact JSON format:
+   {
+     "answer": "Your detailed answer here",
+     "reference_sections": ["Section X of Act Y", ...],
+     "summary": "2-3 line summary",
+     "confidence": "HIGH/MEDIUM/LOW"
+   }
+CONTENT RULES:
+1. NEVER make assumptions or provide information not supported by Indian law
+2. ALWAYS cite specific sections, acts, and legal precedents
+3. If information is insufficient, explicitly state "Insufficient information" in answer
+4. NEVER provide legal advice, only legal information
+5. For any constitutional matters, ALWAYS cite relevant Articles
+ACCURACY RULES:
+1. If confidence is less than 80%, mark as LOW confidence
+2. If multiple interpretations exist, list ALL with citations
+3. If law has been amended, specify the latest amendment date
+4. For case law, cite the full case reference
+PROHIBITED:
+1. NO personal opinions
+2. NO hypothetical scenarios
+3. NO interpretation of ongoing cases
+4. NO advice on specific legal situations
+ERROR HANDLING:
+1. If query is unclear: Request clarification
+2. If outside Indian law scope: State "Outside scope of Indian law"
+3. If conflicting laws exist: List all applicable laws"""
+    def validate_query(self, query: str) -> tuple[bool, str]:
+        """Validate the input query"""
+        if not query or len(query.strip()) < 10:
+            return False, "Query too short. Please provide more details."
+        if len(query) > 500:
+            return False, "Query too long. Please be more concise."
+        if not re.search(r'[?.]$', query):
+            return False, "Query must end with a question mark or period."
+        return True, ""
+    def _search_documents(self, query: str) -> tuple[str, List[str]]:
+        """Search ChromaDB for relevant documents"""
         try:
             results = self.collection.query(
                 query_texts=[query],
+                n_results=3
             )
+            if results and results['documents']:
+                documents = results['documents'][0]
+                metadata = results.get('metadatas', [[]])[0]
+                sources = [m.get('source', 'Unknown') for m in metadata]
+                return "\n\n".join(documents), sources
+            return "", []
         except Exception as e:
+            print(f"Search error: {str(e)}")
+            return "", []
+    def get_response(self, query: str) -> Dict:
+        """Get response from Mistral AI with context from ChromaDB"""
+        # Validate query
+        is_valid, error_message = self.validate_query(query)
+        if not is_valid:
+            return {
+                "answer": error_message,
+                "references": [],
+                "summary": "Invalid query",
+                "confidence": "LOW"
+            }
         try:
+            # Get relevant context from ChromaDB
+            context, sources = self._search_documents(query)
+            # Prepare content
+            content = f"""Context: {context}
+Sources: {', '.join(sources)}
+Question: {query}""" if context else query
+            # Get response from Mistral AI
+            response = self.mistral_client.chat.completions.create(
+                model="mistral-medium",
+                messages=[
+                    {
+                        "role": "system",
+                        "content": self.system_prompt
+                    },
+                    {
+                        "role": "user",
+                        "content": content
+                    }
+                ],
+                temperature=0.1,
+                max_tokens=1000
+            )
+            # Parse response
+            if response.choices and len(response.choices) > 0:
+                try:
+                    result = json.loads(response.choices[0].message.content)
+                    return {
+                        "answer": result.get("answer", "No answer provided"),
+                        "references": result.get("reference_sections", []),
+                        "summary": result.get("summary", ""),
+                        "confidence": result.get("confidence", "LOW")
+                    }
+                except json.JSONDecodeError:
+                    return {
+                        "answer": "Error: Response format invalid",
+                        "references": [],
+                        "summary": "Response parsing failed",
+                        "confidence": "LOW"
+                    }
+            return {
+                "answer": "No response received",
+                "references": [],
+                "summary": "Response generation failed",
+                "confidence": "LOW"
+            }
         except Exception as e:
+            return {
+                "answer": f"Error: {str(e)}",
+                "references": [],
+                "summary": "System error occurred",
+                "confidence": "LOW"
+            }
+# Initialize the assistant
+assistant = LegalAssistant()
+# Create Gradio interface
+def process_query(query: str) -> tuple:
+    response = assistant.get_response(query)
+    return (
+        response["answer"],
+        ", ".join(response["references"]) if response["references"] else "No specific references",
+        response["summary"] if response["summary"] else "No summary available",
+        response["confidence"]
+    )
+# Create the Gradio interface with a professional theme
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # Indian Legal Assistant
+    ## Guidelines for Queries:
+    1. Be specific and clear in your questions
+    2. End questions with a question mark
+    3. Provide relevant context if available
+    4. Keep queries between 10-500 characters
+    """)
+    with gr.Row():
+        query_input = gr.Textbox(
+            label="Enter your legal query",
+            placeholder="e.g., What is the legal age for marriage in India as per current laws?"
+        )
+    with gr.Row():
+        submit_btn = gr.Button("Submit", variant="primary")
+    with gr.Row():
+        confidence_output = gr.Textbox(label="Confidence Level")
+    with gr.Row():
+        answer_output = gr.Textbox(label="Answer", lines=5)
+    with gr.Row():
+        with gr.Column():
+            references_output = gr.Textbox(label="Legal References", lines=3)
+        with gr.Column():
+            summary_output = gr.Textbox(label="Summary", lines=2)
+    gr.Markdown("""
+    ### Important Notes:
+    - This assistant provides legal information, not legal advice
+    - Always verify information with a qualified legal professional
+    - Information is based on Indian law only
+    """)
+    submit_btn.click(
+        fn=process_query,
+        inputs=[query_input],
+        outputs=[answer_output, references_output, summary_output, confidence_output]
+    )
+# Launch the app
+demo.launch()