veerukhannan commited on
Commit
859da87
·
verified ·
1 Parent(s): 4ed9501

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -50
app.py CHANGED
@@ -5,7 +5,7 @@ from openai import OpenAI
5
  import json
6
  from sentence_transformers import SentenceTransformer
7
  from loguru import logger
8
- from test_embeddings import test_chromadb_content
9
 
10
  class SentenceTransformerEmbeddings:
11
  def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
@@ -20,7 +20,6 @@ class LegalAssistant:
20
  try:
21
  # Initialize and verify ChromaDB content
22
  logger.info("Initializing LegalAssistant...")
23
- from test_embeddings import test_chromadb_content, initialize_chromadb
24
 
25
  # Try to verify content, if fails, try to initialize
26
  if not test_chromadb_content():
@@ -96,43 +95,48 @@ class LegalAssistant:
96
 
97
  for doc, meta in zip(results['documents'][0], results['metadatas'][0]):
98
  context_parts.append(f"{meta['title']}:\n{doc}")
99
- references.append(f"{meta['title']} (Section {meta['section_number']})")
100
 
101
  context = "\n\n".join(context_parts)
102
 
103
- # Prepare content for Mistral AI
104
- system_prompt = """You are a specialized legal assistant that MUST follow these STRICT rules:
105
 
106
- CRITICAL RULE:
107
- YOU MUST ONLY USE INFORMATION FROM THE PROVIDED CONTEXT. DO NOT USE ANY EXTERNAL KNOWLEDGE.
 
 
 
 
 
 
 
108
 
109
- RESPONSE FORMAT RULES:
110
- 1. ALWAYS structure your response in this exact JSON format:
111
- {
112
- "answer": "Your detailed answer here using ONLY information from the provided context",
113
- "reference_sections": ["Exact section titles from the context"],
114
- "summary": "2-3 line summary using ONLY information from context",
115
- "confidence": "HIGH/MEDIUM/LOW based on context match"
116
- }
117
 
118
- STRICT CONTENT RULES:
119
- 1. NEVER mention or reference any laws not present in the context
120
- 2. If the information is not in the context, respond with LOW confidence
121
- 3. ONLY cite sections that are explicitly present in the provided context
122
- 4. DO NOT make assumptions or inferences beyond the context
123
- 5. DO NOT combine information from external knowledge"""
 
124
 
125
- content = f"""IMPORTANT: ONLY use information from the following context to answer the question.
126
-
127
- Context Sections:
128
  {context}
129
 
130
- Available Document Sections:
131
- {', '.join(references)}
132
-
133
  Question: {query}
134
 
135
- Remember: ONLY use information from the above context."""
 
 
 
 
 
136
 
137
  # Get response from Mistral AI
138
  response = self.mistral_client.chat.completions.create(
@@ -142,7 +146,8 @@ Remember: ONLY use information from the above context."""
142
  {"role": "user", "content": content}
143
  ],
144
  temperature=0.1,
145
- max_tokens=1000
 
146
  )
147
 
148
  # Parse and validate response
@@ -150,39 +155,56 @@ Remember: ONLY use information from the above context."""
150
  try:
151
  result = json.loads(response.choices[0].message.content)
152
 
153
- # Validate references
154
- valid_references = [ref for ref in result.get("reference_sections", [])
155
- if any(source.split(" (Section")[0] in ref for source in references)]
 
 
 
 
 
156
 
157
- if len(valid_references) != len(result.get("reference_sections", [])):
158
- logger.warning("Response contained unauthorized references")
159
- return {
160
- "answer": "Error: Response contained unauthorized references",
161
- "references": [],
162
- "summary": "Invalid response generated",
163
- "confidence": "LOW"
164
- }
 
 
 
 
165
 
166
  return {
167
- "answer": result.get("answer", "No answer provided"),
168
  "references": valid_references,
169
- "summary": result.get("summary", ""),
170
- "confidence": result.get("confidence", "LOW")
171
  }
172
 
173
- except json.JSONDecodeError:
174
- logger.error("Failed to parse response JSON")
 
 
 
 
 
 
 
 
175
  return {
176
- "answer": "Error: Invalid response format",
177
  "references": [],
178
- "summary": "Response parsing failed",
179
  "confidence": "LOW"
180
  }
181
 
182
  return {
183
- "answer": "No valid response received",
184
  "references": [],
185
- "summary": "Response generation failed",
186
  "confidence": "LOW"
187
  }
188
 
@@ -240,10 +262,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
240
 
241
  with gr.Row():
242
  with gr.Column():
243
- references_output = gr.Textbox(label="Document References", lines=3)
244
  with gr.Column():
245
  summary_output = gr.Textbox(label="Summary", lines=2)
246
 
 
 
 
 
 
 
 
 
247
  submit_btn.click(
248
  fn=process_query,
249
  inputs=[query_input],
 
5
  import json
6
  from sentence_transformers import SentenceTransformer
7
  from loguru import logger
8
+ from test_embeddings import test_chromadb_content, initialize_chromadb
9
 
10
  class SentenceTransformerEmbeddings:
11
  def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
 
20
  try:
21
  # Initialize and verify ChromaDB content
22
  logger.info("Initializing LegalAssistant...")
 
23
 
24
  # Try to verify content, if fails, try to initialize
25
  if not test_chromadb_content():
 
95
 
96
  for doc, meta in zip(results['documents'][0], results['metadatas'][0]):
97
  context_parts.append(f"{meta['title']}:\n{doc}")
98
+ references.append(meta['title'])
99
 
100
  context = "\n\n".join(context_parts)
101
 
102
+ # Prepare system prompt with explicit JSON format
103
+ system_prompt = '''You are a specialized legal assistant that MUST follow these STRICT rules:
104
 
105
+ 1. You MUST ONLY use information from the provided context.
106
+ 2. DO NOT use any external knowledge about laws, IPC, Constitution, or legal matters.
107
+ 3. Your response MUST be in this EXACT JSON format:
108
+ {
109
+ "answer": "Your detailed answer using ONLY information from the context",
110
+ "reference_sections": ["List of section titles used from context"],
111
+ "summary": "Brief 2-3 line summary",
112
+ "confidence": "HIGH/MEDIUM/LOW"
113
+ }
114
 
115
+ Confidence Level Rules:
116
+ - HIGH: When exact information is found in context
117
+ - MEDIUM: When partial or indirect information is found
118
+ - LOW: When information is unclear or not found
 
 
 
 
119
 
120
+ If information is not in context, respond with:
121
+ {
122
+ "answer": "This information is not present in the provided document.",
123
+ "reference_sections": [],
124
+ "summary": "Information not found in document",
125
+ "confidence": "LOW"
126
+ }'''
127
 
128
+ # Prepare user content
129
+ content = f'''Context Sections:
 
130
  {context}
131
 
 
 
 
132
  Question: {query}
133
 
134
+ IMPORTANT:
135
+ 1. Use ONLY the information from the above context
136
+ 2. Format your response as a valid JSON object with the exact structure shown above
137
+ 3. Include ONLY section titles that exist in the context
138
+ 4. DO NOT add any text outside the JSON structure
139
+ 5. Ensure the JSON is properly formatted with double quotes'''
140
 
141
  # Get response from Mistral AI
142
  response = self.mistral_client.chat.completions.create(
 
146
  {"role": "user", "content": content}
147
  ],
148
  temperature=0.1,
149
+ max_tokens=1000,
150
+ response_format={ "type": "json_object" }
151
  )
152
 
153
  # Parse and validate response
 
155
  try:
156
  result = json.loads(response.choices[0].message.content)
157
 
158
+ # Validate response structure
159
+ required_fields = ["answer", "reference_sections", "summary", "confidence"]
160
+ if not all(field in result for field in required_fields):
161
+ raise ValueError("Missing required fields in response")
162
+
163
+ # Validate confidence level
164
+ if result["confidence"] not in ["HIGH", "MEDIUM", "LOW"]:
165
+ result["confidence"] = "LOW"
166
 
167
+ # Validate references against context
168
+ valid_references = [ref for ref in result["reference_sections"]
169
+ if ref in references]
170
+
171
+ # If references don't match, adjust confidence
172
+ if len(valid_references) != len(result["reference_sections"]):
173
+ result["reference_sections"] = valid_references
174
+ result["confidence"] = "LOW"
175
+
176
+ # Ensure answer and summary are strings
177
+ result["answer"] = str(result["answer"])
178
+ result["summary"] = str(result["summary"])
179
 
180
  return {
181
+ "answer": result["answer"],
182
  "references": valid_references,
183
+ "summary": result["summary"],
184
+ "confidence": result["confidence"]
185
  }
186
 
187
+ except json.JSONDecodeError as e:
188
+ logger.error(f"JSON parsing error: {str(e)}")
189
+ return {
190
+ "answer": "Error: Failed to parse response format",
191
+ "references": [],
192
+ "summary": "Response format error",
193
+ "confidence": "LOW"
194
+ }
195
+ except ValueError as e:
196
+ logger.error(f"Validation error: {str(e)}")
197
  return {
198
+ "answer": "Error: Invalid response structure",
199
  "references": [],
200
+ "summary": "Response validation error",
201
  "confidence": "LOW"
202
  }
203
 
204
  return {
205
+ "answer": "Error: No valid response received",
206
  "references": [],
207
+ "summary": "No response generated",
208
  "confidence": "LOW"
209
  }
210
 
 
262
 
263
  with gr.Row():
264
  with gr.Column():
265
+ references_output = gr.Textbox(label="Document References", lines=2)
266
  with gr.Column():
267
  summary_output = gr.Textbox(label="Summary", lines=2)
268
 
269
+ gr.Markdown("""
270
+ ### Important Notes:
271
+ - Responses are based ONLY on the provided document
272
+ - No external legal knowledge is used
273
+ - All references are from the document itself
274
+ - Confidence levels indicate how well the answer matches the document content
275
+ """)
276
+
277
  submit_btn.click(
278
  fn=process_query,
279
  inputs=[query_input],