Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ from openai import OpenAI
|
|
5 |
import json
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
from loguru import logger
|
8 |
-
from test_embeddings import test_chromadb_content
|
9 |
|
10 |
class SentenceTransformerEmbeddings:
|
11 |
def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
|
@@ -20,7 +20,6 @@ class LegalAssistant:
|
|
20 |
try:
|
21 |
# Initialize and verify ChromaDB content
|
22 |
logger.info("Initializing LegalAssistant...")
|
23 |
-
from test_embeddings import test_chromadb_content, initialize_chromadb
|
24 |
|
25 |
# Try to verify content, if fails, try to initialize
|
26 |
if not test_chromadb_content():
|
@@ -96,43 +95,48 @@ class LegalAssistant:
|
|
96 |
|
97 |
for doc, meta in zip(results['documents'][0], results['metadatas'][0]):
|
98 |
context_parts.append(f"{meta['title']}:\n{doc}")
|
99 |
-
references.append(
|
100 |
|
101 |
context = "\n\n".join(context_parts)
|
102 |
|
103 |
-
# Prepare
|
104 |
-
system_prompt =
|
105 |
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
"reference_sections": ["Exact section titles from the context"],
|
114 |
-
"summary": "2-3 line summary using ONLY information from context",
|
115 |
-
"confidence": "HIGH/MEDIUM/LOW based on context match"
|
116 |
-
}
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
Context Sections:
|
128 |
{context}
|
129 |
|
130 |
-
Available Document Sections:
|
131 |
-
{', '.join(references)}
|
132 |
-
|
133 |
Question: {query}
|
134 |
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
# Get response from Mistral AI
|
138 |
response = self.mistral_client.chat.completions.create(
|
@@ -142,7 +146,8 @@ Remember: ONLY use information from the above context."""
|
|
142 |
{"role": "user", "content": content}
|
143 |
],
|
144 |
temperature=0.1,
|
145 |
-
max_tokens=1000
|
|
|
146 |
)
|
147 |
|
148 |
# Parse and validate response
|
@@ -150,39 +155,56 @@ Remember: ONLY use information from the above context."""
|
|
150 |
try:
|
151 |
result = json.loads(response.choices[0].message.content)
|
152 |
|
153 |
-
# Validate
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
|
|
165 |
|
166 |
return {
|
167 |
-
"answer": result
|
168 |
"references": valid_references,
|
169 |
-
"summary": result
|
170 |
-
"confidence": result
|
171 |
}
|
172 |
|
173 |
-
except json.JSONDecodeError:
|
174 |
-
logger.error("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
return {
|
176 |
-
"answer": "Error: Invalid response
|
177 |
"references": [],
|
178 |
-
"summary": "Response
|
179 |
"confidence": "LOW"
|
180 |
}
|
181 |
|
182 |
return {
|
183 |
-
"answer": "No valid response received",
|
184 |
"references": [],
|
185 |
-
"summary": "
|
186 |
"confidence": "LOW"
|
187 |
}
|
188 |
|
@@ -240,10 +262,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
240 |
|
241 |
with gr.Row():
|
242 |
with gr.Column():
|
243 |
-
references_output = gr.Textbox(label="Document References", lines=
|
244 |
with gr.Column():
|
245 |
summary_output = gr.Textbox(label="Summary", lines=2)
|
246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
submit_btn.click(
|
248 |
fn=process_query,
|
249 |
inputs=[query_input],
|
|
|
5 |
import json
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
from loguru import logger
|
8 |
+
from test_embeddings import test_chromadb_content, initialize_chromadb
|
9 |
|
10 |
class SentenceTransformerEmbeddings:
|
11 |
def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
|
|
|
20 |
try:
|
21 |
# Initialize and verify ChromaDB content
|
22 |
logger.info("Initializing LegalAssistant...")
|
|
|
23 |
|
24 |
# Try to verify content, if fails, try to initialize
|
25 |
if not test_chromadb_content():
|
|
|
95 |
|
96 |
for doc, meta in zip(results['documents'][0], results['metadatas'][0]):
|
97 |
context_parts.append(f"{meta['title']}:\n{doc}")
|
98 |
+
references.append(meta['title'])
|
99 |
|
100 |
context = "\n\n".join(context_parts)
|
101 |
|
102 |
+
# Prepare system prompt with explicit JSON format
|
103 |
+
system_prompt = '''You are a specialized legal assistant that MUST follow these STRICT rules:
|
104 |
|
105 |
+
1. You MUST ONLY use information from the provided context.
|
106 |
+
2. DO NOT use any external knowledge about laws, IPC, Constitution, or legal matters.
|
107 |
+
3. Your response MUST be in this EXACT JSON format:
|
108 |
+
{
|
109 |
+
"answer": "Your detailed answer using ONLY information from the context",
|
110 |
+
"reference_sections": ["List of section titles used from context"],
|
111 |
+
"summary": "Brief 2-3 line summary",
|
112 |
+
"confidence": "HIGH/MEDIUM/LOW"
|
113 |
+
}
|
114 |
|
115 |
+
Confidence Level Rules:
|
116 |
+
- HIGH: When exact information is found in context
|
117 |
+
- MEDIUM: When partial or indirect information is found
|
118 |
+
- LOW: When information is unclear or not found
|
|
|
|
|
|
|
|
|
119 |
|
120 |
+
If information is not in context, respond with:
|
121 |
+
{
|
122 |
+
"answer": "This information is not present in the provided document.",
|
123 |
+
"reference_sections": [],
|
124 |
+
"summary": "Information not found in document",
|
125 |
+
"confidence": "LOW"
|
126 |
+
}'''
|
127 |
|
128 |
+
# Prepare user content
|
129 |
+
content = f'''Context Sections:
|
|
|
130 |
{context}
|
131 |
|
|
|
|
|
|
|
132 |
Question: {query}
|
133 |
|
134 |
+
IMPORTANT:
|
135 |
+
1. Use ONLY the information from the above context
|
136 |
+
2. Format your response as a valid JSON object with the exact structure shown above
|
137 |
+
3. Include ONLY section titles that exist in the context
|
138 |
+
4. DO NOT add any text outside the JSON structure
|
139 |
+
5. Ensure the JSON is properly formatted with double quotes'''
|
140 |
|
141 |
# Get response from Mistral AI
|
142 |
response = self.mistral_client.chat.completions.create(
|
|
|
146 |
{"role": "user", "content": content}
|
147 |
],
|
148 |
temperature=0.1,
|
149 |
+
max_tokens=1000,
|
150 |
+
response_format={ "type": "json_object" }
|
151 |
)
|
152 |
|
153 |
# Parse and validate response
|
|
|
155 |
try:
|
156 |
result = json.loads(response.choices[0].message.content)
|
157 |
|
158 |
+
# Validate response structure
|
159 |
+
required_fields = ["answer", "reference_sections", "summary", "confidence"]
|
160 |
+
if not all(field in result for field in required_fields):
|
161 |
+
raise ValueError("Missing required fields in response")
|
162 |
+
|
163 |
+
# Validate confidence level
|
164 |
+
if result["confidence"] not in ["HIGH", "MEDIUM", "LOW"]:
|
165 |
+
result["confidence"] = "LOW"
|
166 |
|
167 |
+
# Validate references against context
|
168 |
+
valid_references = [ref for ref in result["reference_sections"]
|
169 |
+
if ref in references]
|
170 |
+
|
171 |
+
# If references don't match, adjust confidence
|
172 |
+
if len(valid_references) != len(result["reference_sections"]):
|
173 |
+
result["reference_sections"] = valid_references
|
174 |
+
result["confidence"] = "LOW"
|
175 |
+
|
176 |
+
# Ensure answer and summary are strings
|
177 |
+
result["answer"] = str(result["answer"])
|
178 |
+
result["summary"] = str(result["summary"])
|
179 |
|
180 |
return {
|
181 |
+
"answer": result["answer"],
|
182 |
"references": valid_references,
|
183 |
+
"summary": result["summary"],
|
184 |
+
"confidence": result["confidence"]
|
185 |
}
|
186 |
|
187 |
+
except json.JSONDecodeError as e:
|
188 |
+
logger.error(f"JSON parsing error: {str(e)}")
|
189 |
+
return {
|
190 |
+
"answer": "Error: Failed to parse response format",
|
191 |
+
"references": [],
|
192 |
+
"summary": "Response format error",
|
193 |
+
"confidence": "LOW"
|
194 |
+
}
|
195 |
+
except ValueError as e:
|
196 |
+
logger.error(f"Validation error: {str(e)}")
|
197 |
return {
|
198 |
+
"answer": "Error: Invalid response structure",
|
199 |
"references": [],
|
200 |
+
"summary": "Response validation error",
|
201 |
"confidence": "LOW"
|
202 |
}
|
203 |
|
204 |
return {
|
205 |
+
"answer": "Error: No valid response received",
|
206 |
"references": [],
|
207 |
+
"summary": "No response generated",
|
208 |
"confidence": "LOW"
|
209 |
}
|
210 |
|
|
|
262 |
|
263 |
with gr.Row():
|
264 |
with gr.Column():
|
265 |
+
references_output = gr.Textbox(label="Document References", lines=2)
|
266 |
with gr.Column():
|
267 |
summary_output = gr.Textbox(label="Summary", lines=2)
|
268 |
|
269 |
+
gr.Markdown("""
|
270 |
+
### Important Notes:
|
271 |
+
- Responses are based ONLY on the provided document
|
272 |
+
- No external legal knowledge is used
|
273 |
+
- All references are from the document itself
|
274 |
+
- Confidence levels indicate how well the answer matches the document content
|
275 |
+
""")
|
276 |
+
|
277 |
submit_btn.click(
|
278 |
fn=process_query,
|
279 |
inputs=[query_input],
|