veerukhannan commited on
Commit
38dd749
Β·
verified Β·
1 Parent(s): b3413e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -36
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from typing import List, Dict
3
  from langchain_core.prompts import ChatPromptTemplate
4
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
5
  from transformers import pipeline
@@ -7,6 +7,7 @@ import os
7
  from astrapy.db import AstraDB
8
  from dotenv import load_dotenv
9
  from huggingface_hub import login
 
10
 
11
  # Load environment variables
12
  load_dotenv()
@@ -14,7 +15,7 @@ load_dotenv()
14
  # Login to Hugging Face Hub
15
  login(token=os.getenv("HUGGINGFACE_API_TOKEN"))
16
 
17
- class AstraDBChatbot:
18
  def __init__(self):
19
  # Initialize AstraDB connection
20
  self.astra_db = AstraDB(
@@ -25,7 +26,9 @@ class AstraDBChatbot:
25
  # Set your collection
26
  self.collection = self.astra_db.collection(os.getenv("ASTRA_DB_COLLECTION"))
27
 
28
- # Initialize the model - using a smaller model suitable for CPU
 
 
29
  pipe = pipeline(
30
  "text-generation",
31
  model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
@@ -38,16 +41,17 @@ class AstraDBChatbot:
38
 
39
  # Create prompt template
40
  self.template = """
41
- IMPORTANT: You are a helpful assistant that provides information based on the retrieved context.
42
 
43
  STRICT RULES:
44
- 1. Base your response ONLY on the provided context
45
- 2. If you cannot find relevant information, respond with: "I apologize, but I cannot find information about that in the database."
46
  3. Do not make assumptions or use external knowledge
47
- 4. Be concise and accurate in your responses
48
- 5. If quoting from the context, clearly indicate it
 
49
 
50
- Context: {context}
51
 
52
  Chat History: {chat_history}
53
 
@@ -59,55 +63,138 @@ class AstraDBChatbot:
59
  self.chat_history = ""
60
 
61
  def _search_astra(self, query: str) -> List[Dict]:
62
- """Search AstraDB for relevant documents"""
63
  try:
 
 
 
64
  # Perform vector search in AstraDB
65
  results = self.collection.vector_find(
66
- query,
67
- limit=5 # Adjust the limit based on your needs
68
  )
69
- return results
70
  except Exception as e:
71
  print(f"Error searching AstraDB: {str(e)}")
72
  return []
73
 
74
- def chat(self, query: str, history) -> str:
75
- """Process a query and return a response"""
76
  try:
77
- # Search AstraDB for relevant content
78
  search_results = self._search_astra(query)
79
 
80
  if not search_results:
81
- return "I apologize, but I cannot find information about that in the database."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- # Extract and combine relevant content from search results
84
  context = "\n\n".join([result.get('content', '') for result in search_results])
85
 
86
- # Generate response using LLM
87
  chain = self.prompt | self.llm
88
- result = chain.invoke({
89
  "context": context,
90
  "chat_history": self.chat_history,
91
  "question": query
92
  })
93
 
94
- self.chat_history += f"\nUser: {query}\nAI: {result}\n"
 
 
95
 
96
- return result
97
  except Exception as e:
98
- return f"Error processing query: {str(e)}"
99
-
100
- # Initialize the chatbot
101
- chatbot = AstraDBChatbot()
102
 
103
- # Create the Gradio interface
104
- iface = gr.ChatInterface(
105
- chatbot.chat,
106
- title="AstraDB-powered Q&A Chatbot",
107
- description="Ask questions and get answers from your AstraDB database.",
108
- examples=["What information do you have about this topic?", "Can you tell me more about specific details?"],
109
- theme=gr.themes.Soft()
110
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- # Launch the interface
113
- iface.launch()
 
 
 
1
  import gradio as gr
2
+ from typing import List, Dict, Tuple
3
  from langchain_core.prompts import ChatPromptTemplate
4
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
5
  from transformers import pipeline
 
7
  from astrapy.db import AstraDB
8
  from dotenv import load_dotenv
9
  from huggingface_hub import login
10
+ from sentence_transformers import SentenceTransformer
11
 
12
  # Load environment variables
13
  load_dotenv()
 
15
  # Login to Hugging Face Hub
16
  login(token=os.getenv("HUGGINGFACE_API_TOKEN"))
17
 
18
+ class LegalTextSearchBot:
19
  def __init__(self):
20
  # Initialize AstraDB connection
21
  self.astra_db = AstraDB(
 
26
  # Set your collection
27
  self.collection = self.astra_db.collection(os.getenv("ASTRA_DB_COLLECTION"))
28
 
29
+ # Initialize the models
30
+ self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
31
+
32
  pipe = pipeline(
33
  "text-generation",
34
  model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
 
41
 
42
  # Create prompt template
43
  self.template = """
44
+ IMPORTANT: You are a legal assistant that provides accurate information based on the Indian legal sections provided in the context.
45
 
46
  STRICT RULES:
47
+ 1. Base your response ONLY on the provided legal sections
48
+ 2. If you cannot find relevant information, respond with: "I apologize, but I cannot find information about that in the legal database."
49
  3. Do not make assumptions or use external knowledge
50
+ 4. Always cite the specific section numbers you're referring to
51
+ 5. Be precise and accurate in your legal interpretations
52
+ 6. If quoting from the sections, use quotes and cite the section number
53
 
54
+ Context (Legal Sections): {context}
55
 
56
  Chat History: {chat_history}
57
 
 
63
  self.chat_history = ""
64
 
65
  def _search_astra(self, query: str) -> List[Dict]:
66
+ """Search AstraDB for relevant legal sections"""
67
  try:
68
+ # Generate embedding for the query
69
+ query_embedding = self.embedding_model.encode(query).tolist()
70
+
71
  # Perform vector search in AstraDB
72
  results = self.collection.vector_find(
73
+ query_embedding,
74
+ limit=5
75
  )
76
+ return list(results)
77
  except Exception as e:
78
  print(f"Error searching AstraDB: {str(e)}")
79
  return []
80
 
81
+ def search_sections(self, query: str) -> Tuple[str, str]:
82
+ """Search legal sections and return both raw results and AI interpretation"""
83
  try:
84
+ # Search AstraDB for relevant sections
85
  search_results = self._search_astra(query)
86
 
87
  if not search_results:
88
+ return "No relevant sections found.", ""
89
+
90
+ # Format raw results
91
+ raw_results = []
92
+ for result in search_results:
93
+ section_info = f"""
94
+ Section {result.get('section_number')}: {result.get('title')}
95
+ Chapter: {result.get('chapter_info', {}).get('title', 'N/A')}
96
+
97
+ Content:
98
+ {result.get('content', 'N/A')}
99
+
100
+ {"="*80}
101
+ """
102
+ raw_results.append(section_info)
103
 
104
+ # Combine relevant content for AI interpretation
105
  context = "\n\n".join([result.get('content', '') for result in search_results])
106
 
107
+ # Generate AI interpretation
108
  chain = self.prompt | self.llm
109
+ ai_response = chain.invoke({
110
  "context": context,
111
  "chat_history": self.chat_history,
112
  "question": query
113
  })
114
 
115
+ self.chat_history += f"\nUser: {query}\nAI: {ai_response}\n"
116
+
117
+ return "\n".join(raw_results), ai_response
118
 
 
119
  except Exception as e:
120
+ return f"Error processing query: {str(e)}", ""
 
 
 
121
 
122
+ def create_interface():
123
+ """Create the Gradio interface"""
124
+ with gr.Blocks(title="Legal Text Search System", theme=gr.themes.Soft()) as iface:
125
+ gr.Markdown("""
126
+ # πŸ“š Legal Text Search System
127
+
128
+ This system allows you to search through Indian legal sections and get both:
129
+ 1. πŸ“œ Raw section contents that match your query
130
+ 2. πŸ€– AI-powered interpretation of the relevant sections
131
+
132
+ Enter your legal query below:
133
+ """)
134
+
135
+ # Initialize the search bot
136
+ search_bot = LegalTextSearchBot()
137
+
138
+ # Create input and output components
139
+ with gr.Row():
140
+ query_input = gr.Textbox(
141
+ label="Your Query",
142
+ placeholder="e.g., What are the penalties for public servants who conceal information?",
143
+ lines=2
144
+ )
145
+
146
+ with gr.Row():
147
+ search_button = gr.Button("πŸ” Search Legal Sections", variant="primary")
148
+
149
+ with gr.Row():
150
+ with gr.Column():
151
+ raw_output = gr.Textbox(
152
+ label="πŸ“œ Relevant Legal Sections",
153
+ lines=15,
154
+ max_lines=30
155
+ )
156
+ with gr.Column():
157
+ ai_output = gr.Textbox(
158
+ label="πŸ€– AI Interpretation",
159
+ lines=15,
160
+ max_lines=30
161
+ )
162
+
163
+ # Add example queries
164
+ gr.Examples(
165
+ examples=[
166
+ "What are the penalties for public servants who conceal information?",
167
+ "What is the punishment for corruption?",
168
+ "What happens if a public servant fails to prevent an offense?",
169
+ "What are the legal consequences for concealing design to commit offence?",
170
+ "Explain the duties and responsibilities of public servants"
171
+ ],
172
+ inputs=query_input,
173
+ label="Example Queries"
174
+ )
175
+
176
+ # Set up the search function
177
+ def search(query):
178
+ raw_results, ai_response = search_bot.search_sections(query)
179
+ return raw_results, ai_response
180
+
181
+ # Connect the button to the search function
182
+ search_button.click(
183
+ fn=search,
184
+ inputs=query_input,
185
+ outputs=[raw_output, ai_output]
186
+ )
187
+
188
+ # Also allow pressing Enter to search
189
+ query_input.submit(
190
+ fn=search,
191
+ inputs=query_input,
192
+ outputs=[raw_output, ai_output]
193
+ )
194
+
195
+ return iface
196
 
197
+ # Create and launch the interface
198
+ if __name__ == "__main__":
199
+ demo = create_interface()
200
+ demo.launch()