Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,28 +7,25 @@ import os
|
|
7 |
from astrapy.db import AstraDB
|
8 |
from dotenv import load_dotenv
|
9 |
from huggingface_hub import login
|
10 |
-
|
11 |
-
import
|
|
|
|
|
12 |
|
13 |
# Load environment variables
|
14 |
load_dotenv()
|
15 |
-
|
16 |
-
# Login to Hugging Face Hub
|
17 |
login(token=os.getenv("HUGGINGFACE_API_TOKEN"))
|
18 |
|
|
|
|
|
|
|
19 |
class LegalTextSearchBot:
|
20 |
def __init__(self):
|
21 |
-
# Initialize AstraDB connection
|
22 |
self.astra_db = AstraDB(
|
23 |
token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
|
24 |
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT")
|
25 |
)
|
26 |
-
|
27 |
-
# Set your collection
|
28 |
-
self.collection = self.astra_db.collection(os.getenv("ASTRA_DB_COLLECTION"))
|
29 |
-
|
30 |
-
# Initialize the models
|
31 |
-
self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
32 |
|
33 |
pipe = pipeline(
|
34 |
"text-generation",
|
@@ -40,7 +37,6 @@ class LegalTextSearchBot:
|
|
40 |
)
|
41 |
self.llm = HuggingFacePipeline(pipeline=pipe)
|
42 |
|
43 |
-
# Create prompt template
|
44 |
self.template = """
|
45 |
IMPORTANT: You are a legal assistant that provides accurate information based on the Indian legal sections provided in the context.
|
46 |
|
@@ -62,22 +58,21 @@ class LegalTextSearchBot:
|
|
62 |
|
63 |
self.prompt = ChatPromptTemplate.from_template(self.template)
|
64 |
self.chat_history = ""
|
|
|
65 |
|
66 |
def _search_astra(self, query: str) -> List[Dict]:
|
67 |
-
|
68 |
-
|
69 |
-
# Generate embedding for the query
|
70 |
-
query_embedding = self.embedding_model.encode(query).tolist()
|
71 |
|
72 |
-
|
73 |
results = list(self.collection.vector_find(
|
74 |
-
|
75 |
limit=5,
|
76 |
-
fields=["section_number", "title", "
|
|
|
77 |
))
|
78 |
|
79 |
-
if not results:
|
80 |
-
# If no results, try a more general search
|
81 |
results = list(self.collection.find(
|
82 |
{},
|
83 |
limit=5
|
@@ -86,55 +81,59 @@ class LegalTextSearchBot:
|
|
86 |
return results
|
87 |
|
88 |
except Exception as e:
|
89 |
-
|
90 |
-
|
|
|
91 |
|
92 |
def format_section(self, section: Dict) -> str:
|
93 |
-
|
94 |
-
|
95 |
-
chapter_info = section.get('chapter_info', {})
|
96 |
-
chapter_title = chapter_info.get('title', 'N/A') if isinstance(chapter_info, dict) else 'N/A'
|
97 |
|
|
|
98 |
return f"""
|
|
|
|
|
99 |
Section {section.get('section_number', 'N/A')}: {section.get('title', 'N/A')}
|
100 |
-
|
101 |
|
102 |
Content:
|
103 |
{section.get('content', 'N/A')}
|
104 |
|
105 |
-
{
|
|
|
106 |
"""
|
107 |
except Exception as e:
|
108 |
print(f"Error formatting section: {str(e)}")
|
109 |
return str(section)
|
110 |
|
111 |
-
def search_sections(self, query: str) -> Tuple[str, str]:
|
112 |
-
|
|
|
113 |
try:
|
114 |
-
|
115 |
search_results = self._search_astra(query)
|
116 |
|
117 |
if not search_results:
|
118 |
return "No relevant sections found.", "I apologize, but I cannot find relevant sections in the database."
|
119 |
|
120 |
-
|
121 |
raw_results = []
|
122 |
context_parts = []
|
123 |
|
124 |
-
for result in search_results:
|
125 |
-
|
|
|
|
|
126 |
raw_results.append(self.format_section(result))
|
127 |
-
|
128 |
-
# Add to context for AI
|
129 |
context_parts.append(f"""
|
130 |
Section {result.get('section_number')}: {result.get('title')}
|
131 |
{result.get('content', '')}
|
132 |
""")
|
|
|
133 |
|
134 |
-
|
135 |
context = "\n\n".join(context_parts)
|
136 |
|
137 |
-
# Generate AI interpretation
|
138 |
chain = self.prompt | self.llm
|
139 |
ai_response = chain.invoke({
|
140 |
"context": context,
|
@@ -144,30 +143,33 @@ Section {result.get('section_number')}: {result.get('title')}
|
|
144 |
|
145 |
self.chat_history += f"\nUser: {query}\nAI: {ai_response}\n"
|
146 |
|
|
|
147 |
return "\n".join(raw_results), ai_response
|
148 |
|
|
|
|
|
149 |
except Exception as e:
|
150 |
error_msg = f"Error processing query: {str(e)}"
|
151 |
print(error_msg)
|
152 |
return error_msg, "An error occurred while processing your query."
|
153 |
|
|
|
|
|
|
|
154 |
def create_interface():
|
155 |
-
""
|
156 |
-
with gr.Blocks(title="Legal Text Search System", theme=gr.themes.Soft()) as iface:
|
157 |
gr.Markdown("""
|
158 |
-
# π Legal
|
159 |
|
160 |
-
|
161 |
-
1. π
|
162 |
-
2. π€ AI-powered interpretation of the
|
163 |
|
164 |
Enter your legal query below:
|
165 |
""")
|
166 |
|
167 |
-
# Initialize the search bot
|
168 |
search_bot = LegalTextSearchBot()
|
169 |
|
170 |
-
# Create input and output components
|
171 |
with gr.Row():
|
172 |
query_input = gr.Textbox(
|
173 |
label="Your Query",
|
@@ -176,57 +178,65 @@ def create_interface():
|
|
176 |
)
|
177 |
|
178 |
with gr.Row():
|
179 |
-
|
|
|
|
|
|
|
180 |
|
181 |
with gr.Row():
|
182 |
with gr.Column():
|
183 |
-
raw_output = gr.
|
184 |
-
label="π Relevant Legal Sections"
|
185 |
-
lines=15,
|
186 |
-
max_lines=30
|
187 |
)
|
188 |
with gr.Column():
|
189 |
-
ai_output = gr.
|
190 |
-
label="π€ AI Interpretation"
|
191 |
-
lines=15,
|
192 |
-
max_lines=30
|
193 |
)
|
194 |
|
195 |
-
# Add example queries
|
196 |
gr.Examples(
|
197 |
examples=[
|
198 |
"What are the penalties for public servants who conceal information?",
|
199 |
-
"What
|
200 |
-
"
|
201 |
-
"What are the
|
202 |
-
"
|
203 |
],
|
204 |
inputs=query_input,
|
205 |
label="Example Queries"
|
206 |
)
|
207 |
|
208 |
-
# Set up the search function
|
209 |
def search(query):
|
210 |
-
|
211 |
-
|
|
|
|
|
|
|
212 |
|
213 |
-
# Connect the button to the search function
|
214 |
search_button.click(
|
215 |
fn=search,
|
216 |
inputs=query_input,
|
217 |
-
outputs=[raw_output, ai_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
)
|
219 |
|
220 |
-
# Also allow pressing Enter to search
|
221 |
query_input.submit(
|
222 |
fn=search,
|
223 |
inputs=query_input,
|
224 |
-
outputs=[raw_output, ai_output]
|
|
|
225 |
)
|
226 |
|
227 |
return iface
|
228 |
|
229 |
-
# Create and launch the interface
|
230 |
if __name__ == "__main__":
|
231 |
demo = create_interface()
|
232 |
-
demo.launch()
|
|
|
|
|
|
|
|
7 |
from astrapy.db import AstraDB
|
8 |
from dotenv import load_dotenv
|
9 |
from huggingface_hub import login
|
10 |
+
import time
|
11 |
+
import threading
|
12 |
+
from queue import Queue
|
13 |
+
import asyncio
|
14 |
|
15 |
# Load environment variables
|
16 |
load_dotenv()
|
|
|
|
|
17 |
login(token=os.getenv("HUGGINGFACE_API_TOKEN"))
|
18 |
|
19 |
+
class SearchCancelled(Exception):
|
20 |
+
pass
|
21 |
+
|
22 |
class LegalTextSearchBot:
|
23 |
def __init__(self):
|
|
|
24 |
self.astra_db = AstraDB(
|
25 |
token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
|
26 |
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT")
|
27 |
)
|
28 |
+
self.collection = self.astra_db.collection("legal_content")
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
pipe = pipeline(
|
31 |
"text-generation",
|
|
|
37 |
)
|
38 |
self.llm = HuggingFacePipeline(pipeline=pipe)
|
39 |
|
|
|
40 |
self.template = """
|
41 |
IMPORTANT: You are a legal assistant that provides accurate information based on the Indian legal sections provided in the context.
|
42 |
|
|
|
58 |
|
59 |
self.prompt = ChatPromptTemplate.from_template(self.template)
|
60 |
self.chat_history = ""
|
61 |
+
self.cancel_search = False
|
62 |
|
63 |
def _search_astra(self, query: str) -> List[Dict]:
|
64 |
+
if self.cancel_search:
|
65 |
+
raise SearchCancelled("Search was cancelled by user")
|
|
|
|
|
66 |
|
67 |
+
try:
|
68 |
results = list(self.collection.vector_find(
|
69 |
+
query,
|
70 |
limit=5,
|
71 |
+
fields=["section_number", "title", "chapter_number", "chapter_title",
|
72 |
+
"content", "type", "metadata"]
|
73 |
))
|
74 |
|
75 |
+
if not results and not self.cancel_search:
|
|
|
76 |
results = list(self.collection.find(
|
77 |
{},
|
78 |
limit=5
|
|
|
81 |
return results
|
82 |
|
83 |
except Exception as e:
|
84 |
+
if not isinstance(e, SearchCancelled):
|
85 |
+
print(f"Error searching AstraDB: {str(e)}")
|
86 |
+
raise
|
87 |
|
88 |
def format_section(self, section: Dict) -> str:
|
89 |
+
if self.cancel_search:
|
90 |
+
raise SearchCancelled("Search was cancelled by user")
|
|
|
|
|
91 |
|
92 |
+
try:
|
93 |
return f"""
|
94 |
+
{'='*80}
|
95 |
+
Chapter {section.get('chapter_number', 'N/A')}: {section.get('chapter_title', 'N/A')}
|
96 |
Section {section.get('section_number', 'N/A')}: {section.get('title', 'N/A')}
|
97 |
+
Type: {section.get('type', 'section')}
|
98 |
|
99 |
Content:
|
100 |
{section.get('content', 'N/A')}
|
101 |
|
102 |
+
References: {', '.join(section.get('metadata', {}).get('references', [])) or 'None'}
|
103 |
+
{'='*80}
|
104 |
"""
|
105 |
except Exception as e:
|
106 |
print(f"Error formatting section: {str(e)}")
|
107 |
return str(section)
|
108 |
|
109 |
+
def search_sections(self, query: str, progress=gr.Progress()) -> Tuple[str, str]:
|
110 |
+
self.cancel_search = False
|
111 |
+
|
112 |
try:
|
113 |
+
progress(0, desc="Searching relevant sections...")
|
114 |
search_results = self._search_astra(query)
|
115 |
|
116 |
if not search_results:
|
117 |
return "No relevant sections found.", "I apologize, but I cannot find relevant sections in the database."
|
118 |
|
119 |
+
progress(0.3, desc="Processing results...")
|
120 |
raw_results = []
|
121 |
context_parts = []
|
122 |
|
123 |
+
for idx, result in enumerate(search_results):
|
124 |
+
if self.cancel_search:
|
125 |
+
raise SearchCancelled("Search was cancelled by user")
|
126 |
+
|
127 |
raw_results.append(self.format_section(result))
|
|
|
|
|
128 |
context_parts.append(f"""
|
129 |
Section {result.get('section_number')}: {result.get('title')}
|
130 |
{result.get('content', '')}
|
131 |
""")
|
132 |
+
progress((0.3 + (idx * 0.1)), desc="Processing results...")
|
133 |
|
134 |
+
progress(0.8, desc="Generating AI interpretation...")
|
135 |
context = "\n\n".join(context_parts)
|
136 |
|
|
|
137 |
chain = self.prompt | self.llm
|
138 |
ai_response = chain.invoke({
|
139 |
"context": context,
|
|
|
143 |
|
144 |
self.chat_history += f"\nUser: {query}\nAI: {ai_response}\n"
|
145 |
|
146 |
+
progress(1.0, desc="Complete!")
|
147 |
return "\n".join(raw_results), ai_response
|
148 |
|
149 |
+
except SearchCancelled:
|
150 |
+
return "Search cancelled by user.", "Search was stopped. Please try again with a new query."
|
151 |
except Exception as e:
|
152 |
error_msg = f"Error processing query: {str(e)}"
|
153 |
print(error_msg)
|
154 |
return error_msg, "An error occurred while processing your query."
|
155 |
|
156 |
+
def cancel(self):
|
157 |
+
self.cancel_search = True
|
158 |
+
|
159 |
def create_interface():
|
160 |
+
with gr.Blocks(title="Bharatiya Nyaya Sanhita Search", theme=gr.themes.Soft()) as iface:
|
|
|
161 |
gr.Markdown("""
|
162 |
+
# π Bharatiya Nyaya Sanhita Legal Search System
|
163 |
|
164 |
+
Search through the Bharatiya Nyaya Sanhita, 2023 and get:
|
165 |
+
1. π Relevant sections, explanations, and illustrations
|
166 |
+
2. π€ AI-powered interpretation of the legal content
|
167 |
|
168 |
Enter your legal query below:
|
169 |
""")
|
170 |
|
|
|
171 |
search_bot = LegalTextSearchBot()
|
172 |
|
|
|
173 |
with gr.Row():
|
174 |
query_input = gr.Textbox(
|
175 |
label="Your Query",
|
|
|
178 |
)
|
179 |
|
180 |
with gr.Row():
|
181 |
+
with gr.Column(scale=4):
|
182 |
+
search_button = gr.Button("π Search Legal Sections", variant="primary")
|
183 |
+
with gr.Column(scale=1):
|
184 |
+
stop_button = gr.Button("π Stop Search", variant="stop")
|
185 |
|
186 |
with gr.Row():
|
187 |
with gr.Column():
|
188 |
+
raw_output = gr.Markdown(
|
189 |
+
label="π Relevant Legal Sections"
|
|
|
|
|
190 |
)
|
191 |
with gr.Column():
|
192 |
+
ai_output = gr.Markdown(
|
193 |
+
label="π€ AI Interpretation"
|
|
|
|
|
194 |
)
|
195 |
|
|
|
196 |
gr.Examples(
|
197 |
examples=[
|
198 |
"What are the penalties for public servants who conceal information?",
|
199 |
+
"What constitutes criminal conspiracy?",
|
200 |
+
"Explain the provisions related to culpable homicide",
|
201 |
+
"What are the penalties for causing death by negligence?",
|
202 |
+
"What are the punishments for corruption?"
|
203 |
],
|
204 |
inputs=query_input,
|
205 |
label="Example Queries"
|
206 |
)
|
207 |
|
|
|
208 |
def search(query):
|
209 |
+
return search_bot.search_sections(query)
|
210 |
+
|
211 |
+
def stop_search():
|
212 |
+
search_bot.cancel()
|
213 |
+
return "Search cancelled.", "Search stopped by user."
|
214 |
|
|
|
215 |
search_button.click(
|
216 |
fn=search,
|
217 |
inputs=query_input,
|
218 |
+
outputs=[raw_output, ai_output],
|
219 |
+
cancels=[stop_button] # Cancel any ongoing search when stop is clicked
|
220 |
+
)
|
221 |
+
|
222 |
+
stop_button.click(
|
223 |
+
fn=stop_search,
|
224 |
+
outputs=[raw_output, ai_output],
|
225 |
+
cancels=[search_button] # Cancel the search button when stop is clicked
|
226 |
)
|
227 |
|
|
|
228 |
query_input.submit(
|
229 |
fn=search,
|
230 |
inputs=query_input,
|
231 |
+
outputs=[raw_output, ai_output],
|
232 |
+
cancels=[stop_button]
|
233 |
)
|
234 |
|
235 |
return iface
|
236 |
|
|
|
237 |
if __name__ == "__main__":
|
238 |
demo = create_interface()
|
239 |
+
demo.launch()
|
240 |
+
else:
|
241 |
+
demo = create_interface()
|
242 |
+
app = demo.launch(share=False)
|