Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import chromadb | |
from openai import OpenAI | |
import json | |
from sentence_transformers import SentenceTransformer | |
from loguru import logger | |
from test_embeddings import test_chromadb_content, initialize_chromadb | |
class SentenceTransformerEmbeddings: | |
def __init__(self, model_name: str = 'all-MiniLM-L6-v2'): | |
self.model = SentenceTransformer(model_name) | |
def __call__(self, input: list[str]) -> list[list[float]]: | |
embeddings = self.model.encode(input) | |
return embeddings.tolist() | |
class LegalAssistant: | |
def __init__(self): | |
try: | |
# Initialize and verify ChromaDB content | |
logger.info("Initializing Bharateeya Nyaya Sanhita Assistant...") | |
# Try to verify content, if fails, try to initialize | |
if not test_chromadb_content(): | |
logger.warning("ChromaDB verification failed, attempting to initialize...") | |
if not initialize_chromadb(): | |
raise ValueError("Failed to initialize ChromaDB with BNS content") | |
# Initialize ChromaDB client | |
base_path = os.path.dirname(os.path.abspath(__file__)) | |
chroma_path = os.path.join(base_path, 'chroma_db') | |
self.chroma_client = chromadb.PersistentClient(path=chroma_path) | |
self.embedding_function = SentenceTransformerEmbeddings() | |
# Get existing collection | |
self.collection = self.chroma_client.get_collection( | |
name="legal_documents", | |
embedding_function=self.embedding_function | |
) | |
logger.info(f"BNS Collection loaded with {self.collection.count()} sections") | |
# Initialize Mistral AI client | |
self.mistral_client = OpenAI( | |
api_key=os.environ.get("MISTRAL_API_KEY", "dfb2j1YDsa298GXTgZo3juSjZLGUCfwi"), | |
base_url="https://api.mistral.ai/v1" | |
) | |
logger.info("BNS Assistant initialized successfully") | |
except Exception as e: | |
logger.error(f"Error initializing BNS Assistant: {str(e)}") | |
raise | |
def validate_query(self, query: str) -> tuple[bool, str]: | |
"""Validate the input query""" | |
if not query or len(query.strip()) < 10: | |
return False, "Please provide a more detailed question about the Bharateeya Nyaya Sanhita (minimum 10 characters)." | |
if len(query) > 500: | |
return False, "Please make your question more concise (maximum 500 characters)." | |
return True, "" | |
def get_response(self, query: str) -> dict: | |
"""Process query and get response from Mistral AI""" | |
try: | |
# Validate query | |
is_valid, error_message = self.validate_query(query) | |
if not is_valid: | |
return { | |
"answer": error_message, | |
"references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
"summary": "Query validation failed", | |
"confidence": "LOW" | |
} | |
# Search ChromaDB for relevant content | |
results = self.collection.query( | |
query_texts=[query], | |
n_results=3 | |
) | |
if not results['documents'][0]: | |
return { | |
"answer": "No relevant information found in the Bharateeya Nyaya Sanhita.", | |
"references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
"summary": "No matching content in BNS", | |
"confidence": "LOW" | |
} | |
# Format context with section titles | |
context_parts = [] | |
references = [] | |
for doc, meta in zip(results['documents'][0], results['metadatas'][0]): | |
context_parts.append(f"{meta['title']}:\n{doc}") | |
references.append(meta['title']) | |
context = "\n\n".join(context_parts) | |
# Prepare system prompt with explicit JSON format | |
system_prompt = '''You are a specialized legal assistant for the Bharateeya Nyaya Sanhita (BNS) that MUST follow these STRICT rules: | |
1. You MUST ONLY use information from the provided context. | |
2. DO NOT use any external knowledge about laws, IPC, Constitution, or legal matters. | |
3. Your response MUST be in this EXACT JSON format: | |
{ | |
"answer": "Your detailed answer explaining BNS sections in simple, easy-to-understand language. Start with 'The Bharateeya Nyaya Sanhita...'", | |
"reference_sections": ["List of relevant BNS section titles"], | |
"summary": "Provide a user-friendly summary that explains:\n1. What BNS sections were found\n2. What each section covers\n3. How these sections relate to the query\nStart with 'In the Bharateeya Nyaya Sanhita...'", | |
"confidence": "HIGH/MEDIUM/LOW" | |
} | |
Confidence Level Rules: | |
- HIGH: When exact matching BNS sections and their details are found | |
- MEDIUM: When partially relevant BNS sections are found | |
- LOW: When sections are not clearly relevant or not found | |
Response Guidelines: | |
1. Always mention "Bharateeya Nyaya Sanhita" when referencing sections | |
2. Explain legal terms in simple language | |
3. Make the summary easy to understand for non-legal persons | |
4. Break down complex legal concepts into simple explanations | |
5. Use everyday examples where appropriate | |
If information is not in context, respond with: | |
{ | |
"answer": "The Bharateeya Nyaya Sanhita sections related to your query are not present in the provided document.", | |
"reference_sections": [], | |
"summary": "No relevant sections found in the Bharateeya Nyaya Sanhita document", | |
"confidence": "LOW" | |
}''' | |
# Prepare user content | |
content = f'''Context Sections from Bharateeya Nyaya Sanhita: | |
{context} | |
Question: {query} | |
IMPORTANT: | |
1. Use ONLY the information from the above BNS context | |
2. Format your response as a valid JSON object | |
3. Always reference "Bharateeya Nyaya Sanhita" in your response | |
4. Explain each section in simple, user-friendly language | |
5. Make the summary comprehensive but easy to understand | |
6. Break down legal concepts for non-legal persons | |
7. Ensure proper JSON formatting with double quotes''' | |
# Get response from Mistral AI | |
response = self.mistral_client.chat.completions.create( | |
model="mistral-medium", | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": content} | |
], | |
temperature=0.3, | |
max_tokens=1500, | |
response_format={ "type": "json_object" } | |
) | |
# Parse and validate response | |
if response.choices and response.choices[0].message.content: | |
try: | |
result = json.loads(response.choices[0].message.content) | |
# Validate response structure | |
required_fields = ["answer", "reference_sections", "summary", "confidence"] | |
if not all(field in result for field in required_fields): | |
raise ValueError("Missing required fields in response") | |
# Validate confidence level | |
if result["confidence"] not in ["HIGH", "MEDIUM", "LOW"]: | |
result["confidence"] = "LOW" | |
# Validate references against context | |
valid_references = [ref for ref in result["reference_sections"] | |
if ref in references] | |
# Format references to include BNS | |
formatted_references = [f"Bharateeya Nyaya Sanhita - {ref}" | |
for ref in valid_references] | |
# If references don't match, adjust confidence | |
if len(valid_references) != len(result["reference_sections"]): | |
formatted_references = ["No specific references from Bharateeya Nyaya Sanhita"] | |
result["confidence"] = "LOW" | |
# Ensure answer and summary are properly formatted | |
answer = str(result["answer"]) | |
if not answer.startswith("The Bharateeya Nyaya Sanhita"): | |
answer = f"The Bharateeya Nyaya Sanhita states that {answer.lower()}" | |
summary = str(result["summary"]) | |
if not summary.startswith("In the Bharateeya Nyaya Sanhita"): | |
summary = f"In the Bharateeya Nyaya Sanhita, {summary.lower()}" | |
return { | |
"answer": answer, | |
"references": formatted_references, | |
"summary": summary, | |
"confidence": result["confidence"] | |
} | |
except json.JSONDecodeError as e: | |
logger.error(f"JSON parsing error: {str(e)}") | |
return { | |
"answer": "Error: Unable to process the response format", | |
"references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
"summary": "Could not generate summary due to processing error", | |
"confidence": "LOW" | |
} | |
except ValueError as e: | |
logger.error(f"Validation error: {str(e)}") | |
return { | |
"answer": "Error: Response structure was invalid", | |
"references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
"summary": "Could not generate summary due to validation error", | |
"confidence": "LOW" | |
} | |
return { | |
"answer": "Error: No valid response received from the system", | |
"references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
"summary": "Could not generate summary due to system error", | |
"confidence": "LOW" | |
} | |
except Exception as e: | |
logger.error(f"Error in get_response: {str(e)}") | |
return { | |
"answer": f"Error: {str(e)}", | |
"references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
"summary": "Could not generate summary due to system error", | |
"confidence": "LOW" | |
} | |
# Initialize the assistant | |
try: | |
assistant = LegalAssistant() | |
except Exception as e: | |
logger.error(f"Failed to initialize BNS Assistant: {str(e)}") | |
raise | |
def process_query(query: str) -> tuple: | |
"""Process the query and return formatted response""" | |
response = assistant.get_response(query) | |
return ( | |
response["answer"], | |
", ".join(response["references"]), | |
response["summary"], | |
response["confidence"] | |
) | |
# Create the Gradio interface | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown(""" | |
# Bharateeya Nyaya Sanhita Assistant | |
## Your Guide to Understanding the BNS | |
This assistant helps you understand sections and provisions of the Bharateeya Nyaya Sanhita (BNS) in simple, clear language. | |
## Guidelines for Queries: | |
1. Ask specific questions about BNS sections or topics | |
2. End questions with a question mark | |
3. Keep queries between 10-500 characters | |
4. Example queries: | |
- "What does the BNS say about theft?" | |
- "Explain the provisions related to property offenses in BNS." | |
- "What are the sections dealing with criminal breach of trust?" | |
""") | |
with gr.Row(): | |
query_input = gr.Textbox( | |
label="Enter your query about Bharateeya Nyaya Sanhita", | |
placeholder="e.g., What are the main provisions about theft in BNS?" | |
) | |
with gr.Row(): | |
submit_btn = gr.Button("Get BNS Information", variant="primary") | |
with gr.Row(): | |
confidence_output = gr.Textbox(label="Information Reliability Level") | |
with gr.Row(): | |
answer_output = gr.Textbox( | |
label="Detailed Explanation", | |
lines=5 | |
) | |
with gr.Row(): | |
with gr.Column(): | |
references_output = gr.Textbox( | |
label="BNS Section References", | |
lines=2 | |
) | |
with gr.Column(): | |
summary_output = gr.Textbox( | |
label="Simple Summary", | |
lines=2 | |
) | |
gr.Markdown(""" | |
### Important Notes: | |
- All information is sourced directly from the Bharateeya Nyaya Sanhita | |
- Responses are based only on the official BNS document | |
- The assistant explains legal concepts in simple, understandable language | |
- Reliability level indicates how well your query matches BNS content | |
""") | |
submit_btn.click( | |
fn=process_query, | |
inputs=[query_input], | |
outputs=[answer_output, references_output, summary_output, confidence_output] | |
) | |
if __name__ == "__main__": | |
demo.launch() |