File size: 8,200 Bytes
5f5f8de
38dd749
5f5f8de
 
 
 
 
 
 
38dd749
a53e1b6
5f5f8de
 
 
 
 
 
 
38dd749
5f5f8de
 
 
 
 
 
 
 
 
 
38dd749
 
 
5f5f8de
 
 
 
 
 
 
 
 
 
 
 
38dd749
5f5f8de
 
38dd749
 
5f5f8de
38dd749
 
 
5f5f8de
38dd749
5f5f8de
 
 
 
 
 
 
 
 
 
 
38dd749
5f5f8de
38dd749
 
 
a53e1b6
 
38dd749
a53e1b6
 
 
 
 
 
 
 
 
 
 
 
 
5f5f8de
 
 
 
a53e1b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38dd749
 
5f5f8de
38dd749
5f5f8de
 
 
a53e1b6
38dd749
 
 
a53e1b6
 
38dd749
a53e1b6
 
38dd749
a53e1b6
 
 
 
 
5f5f8de
a53e1b6
 
5f5f8de
38dd749
5f5f8de
38dd749
5f5f8de
 
 
 
 
38dd749
 
 
5f5f8de
 
a53e1b6
 
 
5f5f8de
38dd749
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f5f8de
38dd749
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import gradio as gr
from typing import List, Dict, Tuple
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import pipeline
import os
from astrapy.db import AstraDB
from dotenv import load_dotenv
from huggingface_hub import login
from sentence_transformers import SentenceTransformer
import json

# Load environment variables
load_dotenv()

# Login to Hugging Face Hub
login(token=os.getenv("HUGGINGFACE_API_TOKEN"))

class LegalTextSearchBot:
    def __init__(self):
        # Initialize AstraDB connection
        self.astra_db = AstraDB(
            token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
            api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT")
        )
        
        # Set your collection
        self.collection = self.astra_db.collection(os.getenv("ASTRA_DB_COLLECTION"))

        # Initialize the models
        self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        
        pipe = pipeline(
            "text-generation",
            model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
            max_new_tokens=512,
            temperature=0.7,
            top_p=0.95,
            repetition_penalty=1.15
        )
        self.llm = HuggingFacePipeline(pipeline=pipe)
        
        # Create prompt template
        self.template = """
        IMPORTANT: You are a legal assistant that provides accurate information based on the Indian legal sections provided in the context.
        
        STRICT RULES:
        1. Base your response ONLY on the provided legal sections
        2. If you cannot find relevant information, respond with: "I apologize, but I cannot find information about that in the legal database."
        3. Do not make assumptions or use external knowledge
        4. Always cite the specific section numbers you're referring to
        5. Be precise and accurate in your legal interpretations
        6. If quoting from the sections, use quotes and cite the section number
        
        Context (Legal Sections): {context}
        
        Chat History: {chat_history}
        
        Question: {question}
        
        Answer:"""
        
        self.prompt = ChatPromptTemplate.from_template(self.template)
        self.chat_history = ""

    def _search_astra(self, query: str) -> List[Dict]:
        """Search AstraDB for relevant legal sections"""
        try:
            # Generate embedding for the query
            query_embedding = self.embedding_model.encode(query).tolist()
            
            # First try searching in searchable_text
            results = list(self.collection.vector_find(
                query_embedding,
                limit=5,
                fields=["section_number", "title", "chapter_info", "content", "searchable_text"]
            ))
            
            if not results:
                # If no results, try a more general search
                results = list(self.collection.find(
                    {},
                    limit=5
                ))
            
            return results
            
        except Exception as e:
            print(f"Error searching AstraDB: {str(e)}")
            return []

    def format_section(self, section: Dict) -> str:
        """Format a section for display"""
        try:
            chapter_info = section.get('chapter_info', {})
            chapter_title = chapter_info.get('title', 'N/A') if isinstance(chapter_info, dict) else 'N/A'
            
            return f"""
Section {section.get('section_number', 'N/A')}: {section.get('title', 'N/A')}
Chapter: {chapter_title}

Content:
{section.get('content', 'N/A')}

{"="*80}
"""
        except Exception as e:
            print(f"Error formatting section: {str(e)}")
            return str(section)

    def search_sections(self, query: str) -> Tuple[str, str]:
        """Search legal sections and return both raw results and AI interpretation"""
        try:
            # Search AstraDB for relevant sections
            search_results = self._search_astra(query)
            
            if not search_results:
                return "No relevant sections found.", "I apologize, but I cannot find relevant sections in the database."
            
            # Format raw results
            raw_results = []
            context_parts = []
            
            for result in search_results:
                # Format for display
                raw_results.append(self.format_section(result))
                
                # Add to context for AI
                context_parts.append(f"""
Section {result.get('section_number')}: {result.get('title')}
{result.get('content', '')}
""")
            
            # Combine context for AI
            context = "\n\n".join(context_parts)
            
            # Generate AI interpretation
            chain = self.prompt | self.llm
            ai_response = chain.invoke({
                "context": context,
                "chat_history": self.chat_history,
                "question": query
            })
            
            self.chat_history += f"\nUser: {query}\nAI: {ai_response}\n"
            
            return "\n".join(raw_results), ai_response
            
        except Exception as e:
            error_msg = f"Error processing query: {str(e)}"
            print(error_msg)
            return error_msg, "An error occurred while processing your query."

def create_interface():
    """Create the Gradio interface"""
    with gr.Blocks(title="Legal Text Search System", theme=gr.themes.Soft()) as iface:
        gr.Markdown("""
        # πŸ“š Legal Text Search System
        
        This system allows you to search through Indian legal sections and get both:
        1. πŸ“œ Raw section contents that match your query
        2. πŸ€– AI-powered interpretation of the relevant sections
        
        Enter your legal query below:
        """)
        
        # Initialize the search bot
        search_bot = LegalTextSearchBot()
        
        # Create input and output components
        with gr.Row():
            query_input = gr.Textbox(
                label="Your Query",
                placeholder="e.g., What are the penalties for public servants who conceal information?",
                lines=2
            )
        
        with gr.Row():
            search_button = gr.Button("πŸ” Search Legal Sections", variant="primary")
        
        with gr.Row():
            with gr.Column():
                raw_output = gr.Textbox(
                    label="πŸ“œ Relevant Legal Sections",
                    lines=15,
                    max_lines=30
                )
            with gr.Column():
                ai_output = gr.Textbox(
                    label="πŸ€– AI Interpretation",
                    lines=15,
                    max_lines=30
                )
        
        # Add example queries
        gr.Examples(
            examples=[
                "What are the penalties for public servants who conceal information?",
                "What is the punishment for corruption?",
                "What happens if a public servant fails to prevent an offense?",
                "What are the legal consequences for concealing design to commit offence?",
                "Explain the duties and responsibilities of public servants"
            ],
            inputs=query_input,
            label="Example Queries"
        )
        
        # Set up the search function
        def search(query):
            raw_results, ai_response = search_bot.search_sections(query)
            return raw_results, ai_response
        
        # Connect the button to the search function
        search_button.click(
            fn=search,
            inputs=query_input,
            outputs=[raw_output, ai_output]
        )
        
        # Also allow pressing Enter to search
        query_input.submit(
            fn=search,
            inputs=query_input,
            outputs=[raw_output, ai_output]
        )
    
    return iface

# Create and launch the interface
if __name__ == "__main__":
    demo = create_interface()
    demo.launch()