Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,194 +1,215 @@
|
|
1 |
import gradio as gr
|
2 |
-
from typing import List, Dict
|
3 |
-
from langchain_huggingface import HuggingFacePipeline # Fixed import
|
4 |
-
from langchain_core.prompts import ChatPromptTemplate
|
5 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
6 |
import chromadb
|
7 |
-
from chromadb.utils import embedding_functions
|
8 |
-
import torch
|
9 |
import os
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
class
|
12 |
def __init__(self):
|
13 |
-
print("Initializing Legal Chatbot...")
|
14 |
-
|
15 |
# Initialize ChromaDB
|
16 |
self.chroma_client = chromadb.Client()
|
|
|
17 |
|
18 |
-
# Initialize
|
19 |
-
self.
|
20 |
-
|
21 |
-
|
22 |
)
|
23 |
|
24 |
-
#
|
25 |
-
self.
|
26 |
-
name="text_collection",
|
27 |
-
embedding_function=self.embedding_function,
|
28 |
-
metadata={"hnsw:space": "cosine"}
|
29 |
-
)
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
device="cpu"
|
40 |
-
)
|
41 |
-
self.llm = HuggingFacePipeline(pipeline=pipe)
|
42 |
-
|
43 |
-
# Create prompt template
|
44 |
-
self.template = """
|
45 |
-
IMPORTANT: You are a helpful assistant that provides information about the Bharatiya Nyaya Sanhita, 2023 based on the retrieved context.
|
46 |
-
|
47 |
-
STRICT RULES:
|
48 |
-
1. Base your response ONLY on the provided context
|
49 |
-
2. If you cannot find relevant information, respond with: "I apologize, but I cannot find information about that in the database."
|
50 |
-
3. Do not make assumptions or use external knowledge
|
51 |
-
4. Be concise and accurate in your responses
|
52 |
-
5. If quoting from the context, clearly indicate it
|
53 |
-
|
54 |
-
Context: {context}
|
55 |
-
|
56 |
-
Chat History: {chat_history}
|
57 |
-
|
58 |
-
Question: {question}
|
59 |
-
|
60 |
-
Answer:"""
|
61 |
-
|
62 |
-
self.prompt = ChatPromptTemplate.from_template(self.template)
|
63 |
-
self.chat_history = ""
|
64 |
-
self.initialized = False
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
# Add documents in batches
|
90 |
-
batch_size = 50
|
91 |
-
for i in range(0, len(chunks), batch_size):
|
92 |
-
batch = chunks[i:i + batch_size]
|
93 |
-
batch_ids = [f"doc_{j}" for j in range(i, i + len(batch))]
|
94 |
-
batch_metadata = [{
|
95 |
-
"index": index_lines[j].strip() if j < len(index_lines) else f"Chunk {j+1}",
|
96 |
-
"chunk_number": j
|
97 |
-
} for j in range(i, i + len(batch))]
|
98 |
-
|
99 |
-
self.collection.add(
|
100 |
-
documents=batch,
|
101 |
-
ids=batch_ids,
|
102 |
-
metadatas=batch_metadata
|
103 |
-
)
|
104 |
-
|
105 |
-
self.initialized = True
|
106 |
-
return True
|
107 |
-
|
108 |
-
except Exception as e:
|
109 |
-
print(f"Error initializing database: {str(e)}")
|
110 |
-
return False
|
111 |
|
112 |
-
def
|
113 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
try:
|
115 |
results = self.collection.query(
|
116 |
query_texts=[query],
|
117 |
-
n_results=3
|
118 |
-
include=["documents", "metadatas", "distances"]
|
119 |
)
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
for doc, meta, dist in zip(
|
128 |
-
results['documents'][0],
|
129 |
-
results['metadatas'][0],
|
130 |
-
results['distances'][0]
|
131 |
-
)
|
132 |
-
]
|
133 |
except Exception as e:
|
134 |
-
print(f"
|
135 |
-
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
-
def chat(self, query: str, history) -> str:
|
138 |
-
"""Process a query and return a response"""
|
139 |
try:
|
140 |
-
#
|
141 |
-
|
142 |
-
return "Error: Unable to initialize the database. Please try again."
|
143 |
-
|
144 |
-
# Search for relevant content
|
145 |
-
search_results = self._search_database(query)
|
146 |
-
|
147 |
-
if not search_results:
|
148 |
-
return "I apologize, but I cannot find information about that in the database."
|
149 |
|
150 |
-
#
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
])
|
155 |
|
156 |
-
#
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
-
#
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
-
return
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
except Exception as e:
|
170 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
-
#
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
-
# Create the Gradio interface
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
-
# Launch the
|
190 |
-
|
191 |
-
iface.launch(
|
192 |
-
share=False,
|
193 |
-
show_error=True
|
194 |
-
)
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
2 |
import chromadb
|
|
|
|
|
3 |
import os
|
4 |
+
from openai import OpenAI
|
5 |
+
import json
|
6 |
+
from typing import List, Dict
|
7 |
+
import re
|
8 |
|
9 |
+
class LegalAssistant:
|
10 |
def __init__(self):
|
|
|
|
|
11 |
# Initialize ChromaDB
|
12 |
self.chroma_client = chromadb.Client()
|
13 |
+
self.collection = self.chroma_client.get_or_create_collection("legal_documents")
|
14 |
|
15 |
+
# Initialize Mistral AI client
|
16 |
+
self.mistral_client = OpenAI(
|
17 |
+
api_key=os.environ.get("MISTRAL_API_KEY", "dfb2j1YDsa298GXTgZo3juSjZLGUCfwi"),
|
18 |
+
base_url="https://api.mistral.ai/v1"
|
19 |
)
|
20 |
|
21 |
+
# Define system prompt with strict rules
|
22 |
+
self.system_prompt = """You are a specialized legal assistant trained on Indian law. You MUST follow these strict rules:
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
RESPONSE FORMAT RULES:
|
25 |
+
1. ALWAYS structure your response in this exact JSON format:
|
26 |
+
{
|
27 |
+
"answer": "Your detailed answer here",
|
28 |
+
"reference_sections": ["Section X of Act Y", ...],
|
29 |
+
"summary": "2-3 line summary",
|
30 |
+
"confidence": "HIGH/MEDIUM/LOW"
|
31 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
+
CONTENT RULES:
|
34 |
+
1. NEVER make assumptions or provide information not supported by Indian law
|
35 |
+
2. ALWAYS cite specific sections, acts, and legal precedents
|
36 |
+
3. If information is insufficient, explicitly state "Insufficient information" in answer
|
37 |
+
4. NEVER provide legal advice, only legal information
|
38 |
+
5. For any constitutional matters, ALWAYS cite relevant Articles
|
39 |
+
|
40 |
+
ACCURACY RULES:
|
41 |
+
1. If confidence is less than 80%, mark as LOW confidence
|
42 |
+
2. If multiple interpretations exist, list ALL with citations
|
43 |
+
3. If law has been amended, specify the latest amendment date
|
44 |
+
4. For case law, cite the full case reference
|
45 |
+
|
46 |
+
PROHIBITED:
|
47 |
+
1. NO personal opinions
|
48 |
+
2. NO hypothetical scenarios
|
49 |
+
3. NO interpretation of ongoing cases
|
50 |
+
4. NO advice on specific legal situations
|
51 |
+
|
52 |
+
ERROR HANDLING:
|
53 |
+
1. If query is unclear: Request clarification
|
54 |
+
2. If outside Indian law scope: State "Outside scope of Indian law"
|
55 |
+
3. If conflicting laws exist: List all applicable laws"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
+
def validate_query(self, query: str) -> tuple[bool, str]:
|
58 |
+
"""Validate the input query"""
|
59 |
+
if not query or len(query.strip()) < 10:
|
60 |
+
return False, "Query too short. Please provide more details."
|
61 |
+
if len(query) > 500:
|
62 |
+
return False, "Query too long. Please be more concise."
|
63 |
+
if not re.search(r'[?.]$', query):
|
64 |
+
return False, "Query must end with a question mark or period."
|
65 |
+
return True, ""
|
66 |
+
|
67 |
+
def _search_documents(self, query: str) -> tuple[str, List[str]]:
|
68 |
+
"""Search ChromaDB for relevant documents"""
|
69 |
try:
|
70 |
results = self.collection.query(
|
71 |
query_texts=[query],
|
72 |
+
n_results=3
|
|
|
73 |
)
|
74 |
|
75 |
+
if results and results['documents']:
|
76 |
+
documents = results['documents'][0]
|
77 |
+
metadata = results.get('metadatas', [[]])[0]
|
78 |
+
sources = [m.get('source', 'Unknown') for m in metadata]
|
79 |
+
return "\n\n".join(documents), sources
|
80 |
+
return "", []
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
except Exception as e:
|
82 |
+
print(f"Search error: {str(e)}")
|
83 |
+
return "", []
|
84 |
+
|
85 |
+
def get_response(self, query: str) -> Dict:
|
86 |
+
"""Get response from Mistral AI with context from ChromaDB"""
|
87 |
+
# Validate query
|
88 |
+
is_valid, error_message = self.validate_query(query)
|
89 |
+
if not is_valid:
|
90 |
+
return {
|
91 |
+
"answer": error_message,
|
92 |
+
"references": [],
|
93 |
+
"summary": "Invalid query",
|
94 |
+
"confidence": "LOW"
|
95 |
+
}
|
96 |
|
|
|
|
|
97 |
try:
|
98 |
+
# Get relevant context from ChromaDB
|
99 |
+
context, sources = self._search_documents(query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
+
# Prepare content
|
102 |
+
content = f"""Context: {context}
|
103 |
+
Sources: {', '.join(sources)}
|
104 |
+
Question: {query}""" if context else query
|
|
|
105 |
|
106 |
+
# Get response from Mistral AI
|
107 |
+
response = self.mistral_client.chat.completions.create(
|
108 |
+
model="mistral-medium",
|
109 |
+
messages=[
|
110 |
+
{
|
111 |
+
"role": "system",
|
112 |
+
"content": self.system_prompt
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"role": "user",
|
116 |
+
"content": content
|
117 |
+
}
|
118 |
+
],
|
119 |
+
temperature=0.1,
|
120 |
+
max_tokens=1000
|
121 |
+
)
|
122 |
|
123 |
+
# Parse response
|
124 |
+
if response.choices and len(response.choices) > 0:
|
125 |
+
try:
|
126 |
+
result = json.loads(response.choices[0].message.content)
|
127 |
+
return {
|
128 |
+
"answer": result.get("answer", "No answer provided"),
|
129 |
+
"references": result.get("reference_sections", []),
|
130 |
+
"summary": result.get("summary", ""),
|
131 |
+
"confidence": result.get("confidence", "LOW")
|
132 |
+
}
|
133 |
+
except json.JSONDecodeError:
|
134 |
+
return {
|
135 |
+
"answer": "Error: Response format invalid",
|
136 |
+
"references": [],
|
137 |
+
"summary": "Response parsing failed",
|
138 |
+
"confidence": "LOW"
|
139 |
+
}
|
140 |
|
141 |
+
return {
|
142 |
+
"answer": "No response received",
|
143 |
+
"references": [],
|
144 |
+
"summary": "Response generation failed",
|
145 |
+
"confidence": "LOW"
|
146 |
+
}
|
147 |
|
148 |
except Exception as e:
|
149 |
+
return {
|
150 |
+
"answer": f"Error: {str(e)}",
|
151 |
+
"references": [],
|
152 |
+
"summary": "System error occurred",
|
153 |
+
"confidence": "LOW"
|
154 |
+
}
|
155 |
+
|
156 |
+
# Initialize the assistant
|
157 |
+
assistant = LegalAssistant()
|
158 |
|
159 |
+
# Create Gradio interface
|
160 |
+
def process_query(query: str) -> tuple:
|
161 |
+
response = assistant.get_response(query)
|
162 |
+
return (
|
163 |
+
response["answer"],
|
164 |
+
", ".join(response["references"]) if response["references"] else "No specific references",
|
165 |
+
response["summary"] if response["summary"] else "No summary available",
|
166 |
+
response["confidence"]
|
167 |
+
)
|
168 |
|
169 |
+
# Create the Gradio interface with a professional theme
|
170 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
171 |
+
gr.Markdown("""
|
172 |
+
# Indian Legal Assistant
|
173 |
+
## Guidelines for Queries:
|
174 |
+
1. Be specific and clear in your questions
|
175 |
+
2. End questions with a question mark
|
176 |
+
3. Provide relevant context if available
|
177 |
+
4. Keep queries between 10-500 characters
|
178 |
+
""")
|
179 |
+
|
180 |
+
with gr.Row():
|
181 |
+
query_input = gr.Textbox(
|
182 |
+
label="Enter your legal query",
|
183 |
+
placeholder="e.g., What is the legal age for marriage in India as per current laws?"
|
184 |
+
)
|
185 |
+
|
186 |
+
with gr.Row():
|
187 |
+
submit_btn = gr.Button("Submit", variant="primary")
|
188 |
+
|
189 |
+
with gr.Row():
|
190 |
+
confidence_output = gr.Textbox(label="Confidence Level")
|
191 |
+
|
192 |
+
with gr.Row():
|
193 |
+
answer_output = gr.Textbox(label="Answer", lines=5)
|
194 |
+
|
195 |
+
with gr.Row():
|
196 |
+
with gr.Column():
|
197 |
+
references_output = gr.Textbox(label="Legal References", lines=3)
|
198 |
+
with gr.Column():
|
199 |
+
summary_output = gr.Textbox(label="Summary", lines=2)
|
200 |
+
|
201 |
+
gr.Markdown("""
|
202 |
+
### Important Notes:
|
203 |
+
- This assistant provides legal information, not legal advice
|
204 |
+
- Always verify information with a qualified legal professional
|
205 |
+
- Information is based on Indian law only
|
206 |
+
""")
|
207 |
+
|
208 |
+
submit_btn.click(
|
209 |
+
fn=process_query,
|
210 |
+
inputs=[query_input],
|
211 |
+
outputs=[answer_output, references_output, summary_output, confidence_output]
|
212 |
+
)
|
213 |
|
214 |
+
# Launch the app
|
215 |
+
demo.launch()
|
|
|
|
|
|
|
|