veerukhannan commited on
Commit
3564039
·
verified ·
1 Parent(s): 859da87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -65
app.py CHANGED
@@ -19,13 +19,13 @@ class LegalAssistant:
19
  def __init__(self):
20
  try:
21
  # Initialize and verify ChromaDB content
22
- logger.info("Initializing LegalAssistant...")
23
 
24
  # Try to verify content, if fails, try to initialize
25
  if not test_chromadb_content():
26
  logger.warning("ChromaDB verification failed, attempting to initialize...")
27
  if not initialize_chromadb():
28
- raise ValueError("Failed to initialize ChromaDB")
29
 
30
  # Initialize ChromaDB client
31
  base_path = os.path.dirname(os.path.abspath(__file__))
@@ -40,7 +40,7 @@ class LegalAssistant:
40
  embedding_function=self.embedding_function
41
  )
42
 
43
- logger.info(f"Collection loaded with {self.collection.count()} documents")
44
 
45
  # Initialize Mistral AI client
46
  self.mistral_client = OpenAI(
@@ -48,18 +48,18 @@ class LegalAssistant:
48
  base_url="https://api.mistral.ai/v1"
49
  )
50
 
51
- logger.info("LegalAssistant initialized successfully")
52
 
53
  except Exception as e:
54
- logger.error(f"Error initializing LegalAssistant: {str(e)}")
55
  raise
56
 
57
  def validate_query(self, query: str) -> tuple[bool, str]:
58
  """Validate the input query"""
59
  if not query or len(query.strip()) < 10:
60
- return False, "Query too short. Please provide more details (minimum 10 characters)."
61
  if len(query) > 500:
62
- return False, "Query too long. Please be more concise (maximum 500 characters)."
63
  return True, ""
64
 
65
  def get_response(self, query: str) -> dict:
@@ -70,8 +70,8 @@ class LegalAssistant:
70
  if not is_valid:
71
  return {
72
  "answer": error_message,
73
- "references": [],
74
- "summary": "Invalid query",
75
  "confidence": "LOW"
76
  }
77
 
@@ -83,9 +83,9 @@ class LegalAssistant:
83
 
84
  if not results['documents'][0]:
85
  return {
86
- "answer": "No relevant information found in the document.",
87
- "references": [],
88
- "summary": "No matching content",
89
  "confidence": "LOW"
90
  }
91
 
@@ -100,43 +100,52 @@ class LegalAssistant:
100
  context = "\n\n".join(context_parts)
101
 
102
  # Prepare system prompt with explicit JSON format
103
- system_prompt = '''You are a specialized legal assistant that MUST follow these STRICT rules:
104
 
105
  1. You MUST ONLY use information from the provided context.
106
  2. DO NOT use any external knowledge about laws, IPC, Constitution, or legal matters.
107
  3. Your response MUST be in this EXACT JSON format:
108
  {
109
- "answer": "Your detailed answer using ONLY information from the context",
110
- "reference_sections": ["List of section titles used from context"],
111
- "summary": "Brief 2-3 line summary",
112
  "confidence": "HIGH/MEDIUM/LOW"
113
  }
114
 
115
  Confidence Level Rules:
116
- - HIGH: When exact information is found in context
117
- - MEDIUM: When partial or indirect information is found
118
- - LOW: When information is unclear or not found
 
 
 
 
 
 
 
119
 
120
  If information is not in context, respond with:
121
  {
122
- "answer": "This information is not present in the provided document.",
123
  "reference_sections": [],
124
- "summary": "Information not found in document",
125
  "confidence": "LOW"
126
  }'''
127
 
128
  # Prepare user content
129
- content = f'''Context Sections:
130
  {context}
131
 
132
  Question: {query}
133
 
134
  IMPORTANT:
135
- 1. Use ONLY the information from the above context
136
- 2. Format your response as a valid JSON object with the exact structure shown above
137
- 3. Include ONLY section titles that exist in the context
138
- 4. DO NOT add any text outside the JSON structure
139
- 5. Ensure the JSON is properly formatted with double quotes'''
 
 
140
 
141
  # Get response from Mistral AI
142
  response = self.mistral_client.chat.completions.create(
@@ -145,8 +154,8 @@ IMPORTANT:
145
  {"role": "system", "content": system_prompt},
146
  {"role": "user", "content": content}
147
  ],
148
- temperature=0.1,
149
- max_tokens=1000,
150
  response_format={ "type": "json_object" }
151
  )
152
 
@@ -168,43 +177,52 @@ IMPORTANT:
168
  valid_references = [ref for ref in result["reference_sections"]
169
  if ref in references]
170
 
 
 
 
 
171
  # If references don't match, adjust confidence
172
  if len(valid_references) != len(result["reference_sections"]):
173
- result["reference_sections"] = valid_references
174
  result["confidence"] = "LOW"
175
 
176
- # Ensure answer and summary are strings
177
- result["answer"] = str(result["answer"])
178
- result["summary"] = str(result["summary"])
 
 
 
 
 
179
 
180
  return {
181
- "answer": result["answer"],
182
- "references": valid_references,
183
- "summary": result["summary"],
184
  "confidence": result["confidence"]
185
  }
186
 
187
  except json.JSONDecodeError as e:
188
  logger.error(f"JSON parsing error: {str(e)}")
189
  return {
190
- "answer": "Error: Failed to parse response format",
191
- "references": [],
192
- "summary": "Response format error",
193
  "confidence": "LOW"
194
  }
195
  except ValueError as e:
196
  logger.error(f"Validation error: {str(e)}")
197
  return {
198
- "answer": "Error: Invalid response structure",
199
- "references": [],
200
- "summary": "Response validation error",
201
  "confidence": "LOW"
202
  }
203
 
204
  return {
205
- "answer": "Error: No valid response received",
206
- "references": [],
207
- "summary": "No response generated",
208
  "confidence": "LOW"
209
  }
210
 
@@ -212,8 +230,8 @@ IMPORTANT:
212
  logger.error(f"Error in get_response: {str(e)}")
213
  return {
214
  "answer": f"Error: {str(e)}",
215
- "references": [],
216
- "summary": "System error occurred",
217
  "confidence": "LOW"
218
  }
219
 
@@ -221,7 +239,7 @@ IMPORTANT:
221
  try:
222
  assistant = LegalAssistant()
223
  except Exception as e:
224
- logger.error(f"Failed to initialize LegalAssistant: {str(e)}")
225
  raise
226
 
227
  def process_query(query: str) -> tuple:
@@ -229,49 +247,65 @@ def process_query(query: str) -> tuple:
229
  response = assistant.get_response(query)
230
  return (
231
  response["answer"],
232
- ", ".join(response["references"]) if response["references"] else "No specific references",
233
- response["summary"] if response["summary"] else "No summary available",
234
  response["confidence"]
235
  )
236
 
237
  # Create the Gradio interface
238
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
239
  gr.Markdown("""
240
- # Indian Legal Assistant
 
 
 
 
241
  ## Guidelines for Queries:
242
- 1. Be specific and clear in your questions
243
- 2. End questions with a question mark or period
244
  3. Keep queries between 10-500 characters
245
- 4. Questions will be answered based ONLY on the provided legal document
 
 
 
246
  """)
247
 
248
  with gr.Row():
249
  query_input = gr.Textbox(
250
- label="Enter your legal query",
251
- placeholder="e.g., What are the main provisions in this document?"
252
  )
253
 
254
  with gr.Row():
255
- submit_btn = gr.Button("Submit", variant="primary")
256
 
257
  with gr.Row():
258
- confidence_output = gr.Textbox(label="Confidence Level")
259
 
260
  with gr.Row():
261
- answer_output = gr.Textbox(label="Answer", lines=5)
 
 
 
262
 
263
  with gr.Row():
264
  with gr.Column():
265
- references_output = gr.Textbox(label="Document References", lines=2)
 
 
 
266
  with gr.Column():
267
- summary_output = gr.Textbox(label="Summary", lines=2)
 
 
 
268
 
269
  gr.Markdown("""
270
  ### Important Notes:
271
- - Responses are based ONLY on the provided document
272
- - No external legal knowledge is used
273
- - All references are from the document itself
274
- - Confidence levels indicate how well the answer matches the document content
275
  """)
276
 
277
  submit_btn.click(
 
19
  def __init__(self):
20
  try:
21
  # Initialize and verify ChromaDB content
22
+ logger.info("Initializing Bharateeya Nyaya Sanhita Assistant...")
23
 
24
  # Try to verify content, if fails, try to initialize
25
  if not test_chromadb_content():
26
  logger.warning("ChromaDB verification failed, attempting to initialize...")
27
  if not initialize_chromadb():
28
+ raise ValueError("Failed to initialize ChromaDB with BNS content")
29
 
30
  # Initialize ChromaDB client
31
  base_path = os.path.dirname(os.path.abspath(__file__))
 
40
  embedding_function=self.embedding_function
41
  )
42
 
43
+ logger.info(f"BNS Collection loaded with {self.collection.count()} sections")
44
 
45
  # Initialize Mistral AI client
46
  self.mistral_client = OpenAI(
 
48
  base_url="https://api.mistral.ai/v1"
49
  )
50
 
51
+ logger.info("BNS Assistant initialized successfully")
52
 
53
  except Exception as e:
54
+ logger.error(f"Error initializing BNS Assistant: {str(e)}")
55
  raise
56
 
57
  def validate_query(self, query: str) -> tuple[bool, str]:
58
  """Validate the input query"""
59
  if not query or len(query.strip()) < 10:
60
+ return False, "Please provide a more detailed question about the Bharateeya Nyaya Sanhita (minimum 10 characters)."
61
  if len(query) > 500:
62
+ return False, "Please make your question more concise (maximum 500 characters)."
63
  return True, ""
64
 
65
  def get_response(self, query: str) -> dict:
 
70
  if not is_valid:
71
  return {
72
  "answer": error_message,
73
+ "references": ["No specific references from Bharateeya Nyaya Sanhita"],
74
+ "summary": "Query validation failed",
75
  "confidence": "LOW"
76
  }
77
 
 
83
 
84
  if not results['documents'][0]:
85
  return {
86
+ "answer": "No relevant information found in the Bharateeya Nyaya Sanhita.",
87
+ "references": ["No specific references from Bharateeya Nyaya Sanhita"],
88
+ "summary": "No matching content in BNS",
89
  "confidence": "LOW"
90
  }
91
 
 
100
  context = "\n\n".join(context_parts)
101
 
102
  # Prepare system prompt with explicit JSON format
103
+ system_prompt = '''You are a specialized legal assistant for the Bharateeya Nyaya Sanhita (BNS) that MUST follow these STRICT rules:
104
 
105
  1. You MUST ONLY use information from the provided context.
106
  2. DO NOT use any external knowledge about laws, IPC, Constitution, or legal matters.
107
  3. Your response MUST be in this EXACT JSON format:
108
  {
109
+ "answer": "Your detailed answer explaining BNS sections in simple, easy-to-understand language. Start with 'The Bharateeya Nyaya Sanhita...'",
110
+ "reference_sections": ["List of relevant BNS section titles"],
111
+ "summary": "Provide a user-friendly summary that explains:\n1. What BNS sections were found\n2. What each section covers\n3. How these sections relate to the query\nStart with 'In the Bharateeya Nyaya Sanhita...'",
112
  "confidence": "HIGH/MEDIUM/LOW"
113
  }
114
 
115
  Confidence Level Rules:
116
+ - HIGH: When exact matching BNS sections and their details are found
117
+ - MEDIUM: When partially relevant BNS sections are found
118
+ - LOW: When sections are not clearly relevant or not found
119
+
120
+ Response Guidelines:
121
+ 1. Always mention "Bharateeya Nyaya Sanhita" when referencing sections
122
+ 2. Explain legal terms in simple language
123
+ 3. Make the summary easy to understand for non-legal persons
124
+ 4. Break down complex legal concepts into simple explanations
125
+ 5. Use everyday examples where appropriate
126
 
127
  If information is not in context, respond with:
128
  {
129
+ "answer": "The Bharateeya Nyaya Sanhita sections related to your query are not present in the provided document.",
130
  "reference_sections": [],
131
+ "summary": "No relevant sections found in the Bharateeya Nyaya Sanhita document",
132
  "confidence": "LOW"
133
  }'''
134
 
135
  # Prepare user content
136
+ content = f'''Context Sections from Bharateeya Nyaya Sanhita:
137
  {context}
138
 
139
  Question: {query}
140
 
141
  IMPORTANT:
142
+ 1. Use ONLY the information from the above BNS context
143
+ 2. Format your response as a valid JSON object
144
+ 3. Always reference "Bharateeya Nyaya Sanhita" in your response
145
+ 4. Explain each section in simple, user-friendly language
146
+ 5. Make the summary comprehensive but easy to understand
147
+ 6. Break down legal concepts for non-legal persons
148
+ 7. Ensure proper JSON formatting with double quotes'''
149
 
150
  # Get response from Mistral AI
151
  response = self.mistral_client.chat.completions.create(
 
154
  {"role": "system", "content": system_prompt},
155
  {"role": "user", "content": content}
156
  ],
157
+ temperature=0.3,
158
+ max_tokens=1500,
159
  response_format={ "type": "json_object" }
160
  )
161
 
 
177
  valid_references = [ref for ref in result["reference_sections"]
178
  if ref in references]
179
 
180
+ # Format references to include BNS
181
+ formatted_references = [f"Bharateeya Nyaya Sanhita - {ref}"
182
+ for ref in valid_references]
183
+
184
  # If references don't match, adjust confidence
185
  if len(valid_references) != len(result["reference_sections"]):
186
+ formatted_references = ["No specific references from Bharateeya Nyaya Sanhita"]
187
  result["confidence"] = "LOW"
188
 
189
+ # Ensure answer and summary are properly formatted
190
+ answer = str(result["answer"])
191
+ if not answer.startswith("The Bharateeya Nyaya Sanhita"):
192
+ answer = f"The Bharateeya Nyaya Sanhita states that {answer.lower()}"
193
+
194
+ summary = str(result["summary"])
195
+ if not summary.startswith("In the Bharateeya Nyaya Sanhita"):
196
+ summary = f"In the Bharateeya Nyaya Sanhita, {summary.lower()}"
197
 
198
  return {
199
+ "answer": answer,
200
+ "references": formatted_references,
201
+ "summary": summary,
202
  "confidence": result["confidence"]
203
  }
204
 
205
  except json.JSONDecodeError as e:
206
  logger.error(f"JSON parsing error: {str(e)}")
207
  return {
208
+ "answer": "Error: Unable to process the response format",
209
+ "references": ["No specific references from Bharateeya Nyaya Sanhita"],
210
+ "summary": "Could not generate summary due to processing error",
211
  "confidence": "LOW"
212
  }
213
  except ValueError as e:
214
  logger.error(f"Validation error: {str(e)}")
215
  return {
216
+ "answer": "Error: Response structure was invalid",
217
+ "references": ["No specific references from Bharateeya Nyaya Sanhita"],
218
+ "summary": "Could not generate summary due to validation error",
219
  "confidence": "LOW"
220
  }
221
 
222
  return {
223
+ "answer": "Error: No valid response received from the system",
224
+ "references": ["No specific references from Bharateeya Nyaya Sanhita"],
225
+ "summary": "Could not generate summary due to system error",
226
  "confidence": "LOW"
227
  }
228
 
 
230
  logger.error(f"Error in get_response: {str(e)}")
231
  return {
232
  "answer": f"Error: {str(e)}",
233
+ "references": ["No specific references from Bharateeya Nyaya Sanhita"],
234
+ "summary": "Could not generate summary due to system error",
235
  "confidence": "LOW"
236
  }
237
 
 
239
  try:
240
  assistant = LegalAssistant()
241
  except Exception as e:
242
+ logger.error(f"Failed to initialize BNS Assistant: {str(e)}")
243
  raise
244
 
245
  def process_query(query: str) -> tuple:
 
247
  response = assistant.get_response(query)
248
  return (
249
  response["answer"],
250
+ ", ".join(response["references"]),
251
+ response["summary"],
252
  response["confidence"]
253
  )
254
 
255
  # Create the Gradio interface
256
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
257
  gr.Markdown("""
258
+ # Bharateeya Nyaya Sanhita Assistant
259
+ ## Your Guide to Understanding the BNS
260
+
261
+ This assistant helps you understand sections and provisions of the Bharateeya Nyaya Sanhita (BNS) in simple, clear language.
262
+
263
  ## Guidelines for Queries:
264
+ 1. Ask specific questions about BNS sections or topics
265
+ 2. End questions with a question mark
266
  3. Keep queries between 10-500 characters
267
+ 4. Example queries:
268
+ - "What does the BNS say about theft?"
269
+ - "Explain the provisions related to property offenses in BNS."
270
+ - "What are the sections dealing with criminal breach of trust?"
271
  """)
272
 
273
  with gr.Row():
274
  query_input = gr.Textbox(
275
+ label="Enter your query about Bharateeya Nyaya Sanhita",
276
+ placeholder="e.g., What are the main provisions about theft in BNS?"
277
  )
278
 
279
  with gr.Row():
280
+ submit_btn = gr.Button("Get BNS Information", variant="primary")
281
 
282
  with gr.Row():
283
+ confidence_output = gr.Textbox(label="Information Reliability Level")
284
 
285
  with gr.Row():
286
+ answer_output = gr.Textbox(
287
+ label="Detailed Explanation",
288
+ lines=5
289
+ )
290
 
291
  with gr.Row():
292
  with gr.Column():
293
+ references_output = gr.Textbox(
294
+ label="BNS Section References",
295
+ lines=2
296
+ )
297
  with gr.Column():
298
+ summary_output = gr.Textbox(
299
+ label="Simple Summary",
300
+ lines=2
301
+ )
302
 
303
  gr.Markdown("""
304
  ### Important Notes:
305
+ - All information is sourced directly from the Bharateeya Nyaya Sanhita
306
+ - Responses are based only on the official BNS document
307
+ - The assistant explains legal concepts in simple, understandable language
308
+ - Reliability level indicates how well your query matches BNS content
309
  """)
310
 
311
  submit_btn.click(