veerukhannan commited on
Commit
e3f3bb1
·
verified ·
1 Parent(s): c9602aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -164
app.py CHANGED
@@ -1,194 +1,215 @@
1
  import gradio as gr
2
- from typing import List, Dict
3
- from langchain_huggingface import HuggingFacePipeline # Fixed import
4
- from langchain_core.prompts import ChatPromptTemplate
5
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
6
  import chromadb
7
- from chromadb.utils import embedding_functions
8
- import torch
9
  import os
 
 
 
 
10
 
11
- class LegalChatbot:
12
  def __init__(self):
13
- print("Initializing Legal Chatbot...")
14
-
15
  # Initialize ChromaDB
16
  self.chroma_client = chromadb.Client()
 
17
 
18
- # Initialize embedding function
19
- self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
20
- model_name="all-MiniLM-L6-v2",
21
- device="cpu"
22
  )
23
 
24
- # Create collection
25
- self.collection = self.chroma_client.create_collection(
26
- name="text_collection",
27
- embedding_function=self.embedding_function,
28
- metadata={"hnsw:space": "cosine"}
29
- )
30
 
31
- # Initialize the model - using a smaller model suitable for CPU
32
- pipe = pipeline(
33
- "text-generation",
34
- model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
35
- max_new_tokens=512,
36
- temperature=0.7,
37
- top_p=0.95,
38
- repetition_penalty=1.15,
39
- device="cpu"
40
- )
41
- self.llm = HuggingFacePipeline(pipeline=pipe)
42
-
43
- # Create prompt template
44
- self.template = """
45
- IMPORTANT: You are a helpful assistant that provides information about the Bharatiya Nyaya Sanhita, 2023 based on the retrieved context.
46
-
47
- STRICT RULES:
48
- 1. Base your response ONLY on the provided context
49
- 2. If you cannot find relevant information, respond with: "I apologize, but I cannot find information about that in the database."
50
- 3. Do not make assumptions or use external knowledge
51
- 4. Be concise and accurate in your responses
52
- 5. If quoting from the context, clearly indicate it
53
-
54
- Context: {context}
55
-
56
- Chat History: {chat_history}
57
-
58
- Question: {question}
59
-
60
- Answer:"""
61
-
62
- self.prompt = ChatPromptTemplate.from_template(self.template)
63
- self.chat_history = ""
64
- self.initialized = False
65
 
66
- def _initialize_database(self) -> bool:
67
- """Initialize the database with document content"""
68
- try:
69
- if self.initialized:
70
- return True
71
-
72
- print("Loading documents into database...")
73
-
74
- # Read the main text file
75
- with open('a2023-45.txt', 'r', encoding='utf-8') as f:
76
- text_content = f.read()
77
-
78
- # Read the index file
79
- with open('index.txt', 'r', encoding='utf-8') as f:
80
- index_lines = f.readlines()
81
-
82
- # Create chunks
83
- chunk_size = 512
84
- chunks = []
85
- for i in range(0, len(text_content), chunk_size):
86
- chunk = text_content[i:i + chunk_size]
87
- chunks.append(chunk)
88
-
89
- # Add documents in batches
90
- batch_size = 50
91
- for i in range(0, len(chunks), batch_size):
92
- batch = chunks[i:i + batch_size]
93
- batch_ids = [f"doc_{j}" for j in range(i, i + len(batch))]
94
- batch_metadata = [{
95
- "index": index_lines[j].strip() if j < len(index_lines) else f"Chunk {j+1}",
96
- "chunk_number": j
97
- } for j in range(i, i + len(batch))]
98
-
99
- self.collection.add(
100
- documents=batch,
101
- ids=batch_ids,
102
- metadatas=batch_metadata
103
- )
104
-
105
- self.initialized = True
106
- return True
107
-
108
- except Exception as e:
109
- print(f"Error initializing database: {str(e)}")
110
- return False
111
 
112
- def _search_database(self, query: str) -> List[Dict]:
113
- """Search the database for relevant documents"""
 
 
 
 
 
 
 
 
 
 
114
  try:
115
  results = self.collection.query(
116
  query_texts=[query],
117
- n_results=3,
118
- include=["documents", "metadatas", "distances"]
119
  )
120
 
121
- return [
122
- {
123
- "content": doc,
124
- "metadata": meta,
125
- "score": 1 - dist
126
- }
127
- for doc, meta, dist in zip(
128
- results['documents'][0],
129
- results['metadatas'][0],
130
- results['distances'][0]
131
- )
132
- ]
133
  except Exception as e:
134
- print(f"Error searching database: {str(e)}")
135
- return []
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- def chat(self, query: str, history) -> str:
138
- """Process a query and return a response"""
139
  try:
140
- # Initialize database if needed
141
- if not self.initialized and not self._initialize_database():
142
- return "Error: Unable to initialize the database. Please try again."
143
-
144
- # Search for relevant content
145
- search_results = self._search_database(query)
146
-
147
- if not search_results:
148
- return "I apologize, but I cannot find information about that in the database."
149
 
150
- # Extract and combine relevant content
151
- context = "\n\n".join([
152
- f"[Section {r['metadata']['index']}]\n{r['content']}"
153
- for r in search_results
154
- ])
155
 
156
- # Generate response using LLM
157
- chain = self.prompt | self.llm
158
- result = chain.invoke({
159
- "context": context,
160
- "chat_history": self.chat_history,
161
- "question": query
162
- })
 
 
 
 
 
 
 
 
 
163
 
164
- # Update chat history
165
- self.chat_history += f"\nUser: {query}\nAI: {result}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- return result
 
 
 
 
 
168
 
169
  except Exception as e:
170
- return f"Error processing query: {str(e)}"
 
 
 
 
 
 
 
 
171
 
172
- # Initialize the chatbot
173
- chatbot = LegalChatbot()
 
 
 
 
 
 
 
174
 
175
- # Create the Gradio interface
176
- iface = gr.ChatInterface(
177
- chatbot.chat,
178
- title="Bharatiya Nyaya Sanhita, 2023 - Legal Assistant",
179
- description="Ask questions about the Bharatiya Nyaya Sanhita, 2023. The system will initialize on your first query.",
180
- examples=[
181
- "What is criminal conspiracy?",
182
- "What are the punishments for corruption?",
183
- "Explain the concept of culpable homicide",
184
- "What constitutes theft under the act?"
185
- ],
186
- theme=gr.themes.Soft()
187
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
- # Launch the interface
190
- if __name__ == "__main__":
191
- iface.launch(
192
- share=False,
193
- show_error=True
194
- )
 
1
  import gradio as gr
 
 
 
 
2
  import chromadb
 
 
3
  import os
4
+ from openai import OpenAI
5
+ import json
6
+ from typing import List, Dict
7
+ import re
8
 
9
+ class LegalAssistant:
10
  def __init__(self):
 
 
11
  # Initialize ChromaDB
12
  self.chroma_client = chromadb.Client()
13
+ self.collection = self.chroma_client.get_or_create_collection("legal_documents")
14
 
15
+ # Initialize Mistral AI client
16
+ self.mistral_client = OpenAI(
17
+ api_key=os.environ.get("MISTRAL_API_KEY", "dfb2j1YDsa298GXTgZo3juSjZLGUCfwi"),
18
+ base_url="https://api.mistral.ai/v1"
19
  )
20
 
21
+ # Define system prompt with strict rules
22
+ self.system_prompt = """You are a specialized legal assistant trained on Indian law. You MUST follow these strict rules:
 
 
 
 
23
 
24
+ RESPONSE FORMAT RULES:
25
+ 1. ALWAYS structure your response in this exact JSON format:
26
+ {
27
+ "answer": "Your detailed answer here",
28
+ "reference_sections": ["Section X of Act Y", ...],
29
+ "summary": "2-3 line summary",
30
+ "confidence": "HIGH/MEDIUM/LOW"
31
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ CONTENT RULES:
34
+ 1. NEVER make assumptions or provide information not supported by Indian law
35
+ 2. ALWAYS cite specific sections, acts, and legal precedents
36
+ 3. If information is insufficient, explicitly state "Insufficient information" in answer
37
+ 4. NEVER provide legal advice, only legal information
38
+ 5. For any constitutional matters, ALWAYS cite relevant Articles
39
+
40
+ ACCURACY RULES:
41
+ 1. If confidence is less than 80%, mark as LOW confidence
42
+ 2. If multiple interpretations exist, list ALL with citations
43
+ 3. If law has been amended, specify the latest amendment date
44
+ 4. For case law, cite the full case reference
45
+
46
+ PROHIBITED:
47
+ 1. NO personal opinions
48
+ 2. NO hypothetical scenarios
49
+ 3. NO interpretation of ongoing cases
50
+ 4. NO advice on specific legal situations
51
+
52
+ ERROR HANDLING:
53
+ 1. If query is unclear: Request clarification
54
+ 2. If outside Indian law scope: State "Outside scope of Indian law"
55
+ 3. If conflicting laws exist: List all applicable laws"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ def validate_query(self, query: str) -> tuple[bool, str]:
58
+ """Validate the input query"""
59
+ if not query or len(query.strip()) < 10:
60
+ return False, "Query too short. Please provide more details."
61
+ if len(query) > 500:
62
+ return False, "Query too long. Please be more concise."
63
+ if not re.search(r'[?.]$', query):
64
+ return False, "Query must end with a question mark or period."
65
+ return True, ""
66
+
67
+ def _search_documents(self, query: str) -> tuple[str, List[str]]:
68
+ """Search ChromaDB for relevant documents"""
69
  try:
70
  results = self.collection.query(
71
  query_texts=[query],
72
+ n_results=3
 
73
  )
74
 
75
+ if results and results['documents']:
76
+ documents = results['documents'][0]
77
+ metadata = results.get('metadatas', [[]])[0]
78
+ sources = [m.get('source', 'Unknown') for m in metadata]
79
+ return "\n\n".join(documents), sources
80
+ return "", []
 
 
 
 
 
 
81
  except Exception as e:
82
+ print(f"Search error: {str(e)}")
83
+ return "", []
84
+
85
+ def get_response(self, query: str) -> Dict:
86
+ """Get response from Mistral AI with context from ChromaDB"""
87
+ # Validate query
88
+ is_valid, error_message = self.validate_query(query)
89
+ if not is_valid:
90
+ return {
91
+ "answer": error_message,
92
+ "references": [],
93
+ "summary": "Invalid query",
94
+ "confidence": "LOW"
95
+ }
96
 
 
 
97
  try:
98
+ # Get relevant context from ChromaDB
99
+ context, sources = self._search_documents(query)
 
 
 
 
 
 
 
100
 
101
+ # Prepare content
102
+ content = f"""Context: {context}
103
+ Sources: {', '.join(sources)}
104
+ Question: {query}""" if context else query
 
105
 
106
+ # Get response from Mistral AI
107
+ response = self.mistral_client.chat.completions.create(
108
+ model="mistral-medium",
109
+ messages=[
110
+ {
111
+ "role": "system",
112
+ "content": self.system_prompt
113
+ },
114
+ {
115
+ "role": "user",
116
+ "content": content
117
+ }
118
+ ],
119
+ temperature=0.1,
120
+ max_tokens=1000
121
+ )
122
 
123
+ # Parse response
124
+ if response.choices and len(response.choices) > 0:
125
+ try:
126
+ result = json.loads(response.choices[0].message.content)
127
+ return {
128
+ "answer": result.get("answer", "No answer provided"),
129
+ "references": result.get("reference_sections", []),
130
+ "summary": result.get("summary", ""),
131
+ "confidence": result.get("confidence", "LOW")
132
+ }
133
+ except json.JSONDecodeError:
134
+ return {
135
+ "answer": "Error: Response format invalid",
136
+ "references": [],
137
+ "summary": "Response parsing failed",
138
+ "confidence": "LOW"
139
+ }
140
 
141
+ return {
142
+ "answer": "No response received",
143
+ "references": [],
144
+ "summary": "Response generation failed",
145
+ "confidence": "LOW"
146
+ }
147
 
148
  except Exception as e:
149
+ return {
150
+ "answer": f"Error: {str(e)}",
151
+ "references": [],
152
+ "summary": "System error occurred",
153
+ "confidence": "LOW"
154
+ }
155
+
156
+ # Initialize the assistant
157
+ assistant = LegalAssistant()
158
 
159
+ # Create Gradio interface
160
+ def process_query(query: str) -> tuple:
161
+ response = assistant.get_response(query)
162
+ return (
163
+ response["answer"],
164
+ ", ".join(response["references"]) if response["references"] else "No specific references",
165
+ response["summary"] if response["summary"] else "No summary available",
166
+ response["confidence"]
167
+ )
168
 
169
+ # Create the Gradio interface with a professional theme
170
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
171
+ gr.Markdown("""
172
+ # Indian Legal Assistant
173
+ ## Guidelines for Queries:
174
+ 1. Be specific and clear in your questions
175
+ 2. End questions with a question mark
176
+ 3. Provide relevant context if available
177
+ 4. Keep queries between 10-500 characters
178
+ """)
179
+
180
+ with gr.Row():
181
+ query_input = gr.Textbox(
182
+ label="Enter your legal query",
183
+ placeholder="e.g., What is the legal age for marriage in India as per current laws?"
184
+ )
185
+
186
+ with gr.Row():
187
+ submit_btn = gr.Button("Submit", variant="primary")
188
+
189
+ with gr.Row():
190
+ confidence_output = gr.Textbox(label="Confidence Level")
191
+
192
+ with gr.Row():
193
+ answer_output = gr.Textbox(label="Answer", lines=5)
194
+
195
+ with gr.Row():
196
+ with gr.Column():
197
+ references_output = gr.Textbox(label="Legal References", lines=3)
198
+ with gr.Column():
199
+ summary_output = gr.Textbox(label="Summary", lines=2)
200
+
201
+ gr.Markdown("""
202
+ ### Important Notes:
203
+ - This assistant provides legal information, not legal advice
204
+ - Always verify information with a qualified legal professional
205
+ - Information is based on Indian law only
206
+ """)
207
+
208
+ submit_btn.click(
209
+ fn=process_query,
210
+ inputs=[query_input],
211
+ outputs=[answer_output, references_output, summary_output, confidence_output]
212
+ )
213
 
214
+ # Launch the app
215
+ demo.launch()