Spaces:

lobsterScraper
/

LegalSearchEngine

Runtime error

App Files Files Community

lobsterScraper commited on Jan 23

Commit

8f7d06e

1 Parent(s): 199df2d

change analysis

Browse files

Files changed (1) hide show

app.py +1032 -116

app.py CHANGED Viewed

@@ -1,5 +1,1026 @@
 import os
 import gradio as gr
 from pinecone import Pinecone
@@ -892,131 +1913,26 @@ async def process_query_async(query: str, search_system: LegalSearchSystem, grap
     Asynchronous function to process both traditional and graph-based searches
     """
     if not query.strip():
-        return "Please enter a query", "", "", "", {}
     # Regular search (synchronous)
-    results = search_system.search(query)
     try:
         # Graph search (asynchronous)
         graph_results = await graph_search_system.process_legal_query(query)
         graph_documents = graph_results.get('documents', "Error processing graph search")
         graph_concepts = graph_results.get('related_concepts', {})
-    except Exception as e:
-        graph_documents = f"Error processing graph search: {str(e)}"
-        graph_concepts = {}
-    graph_data = graph_search_system.generate_document_graph(query)
-    graph_fig = create_graph_visualization(graph_data)
-    return (
-        results['status'],
-        results['documents'],
-        results['analysis'],
-        graph_documents,
-        graph_concepts,
-        graph_fig,
-        "Click on a node to view document content"
-    )
-def create_interface(graph_search_system: EnhancedLegalSearchSystem):
-    search_system = LegalSearchSystem()
-    with gr.Blocks(css="footer {display: none !important;}") as demo:
-        gr.Markdown("""
-        # Corporate Law Legal Search Engine
-        Enter your legal query below to search through documents and get an AI-powered analysis.Queires only related to corporate law will give relevant information
-        """)
-        with gr.Row():
-            query_input = gr.Textbox(
-                label="Legal Query",
-                placeholder="e.g., What are the key principles of contract law?",
-                lines=3
-            )
-        with gr.Row():
-            search_button = gr.Button("Search & Analyze")
-        status_output = gr.Textbox(
-            label="Status",
-            interactive=False
-        )
-        with gr.Tabs():
-            with gr.TabItem("Search Results"):
-                docs_output = gr.Markdown(
-                    label="Retrieved Documents",
-                    value="Search results will appear here..."
-                )
-            with gr.TabItem("AI Legal Analysis"):
-                summary_output = gr.Markdown(
-                    label="AI-Generated Legal Analysis",
-                    value="Analysis will appear here..."
-                )
-            with gr.TabItem("Retrieved Documents through Graph Rag"):
-                docs_output_graph = gr.Markdown(
-                    label="Source Documents",
-                    value="Search results will appear here..."
-                )
-                graph_analysis_output = gr.JSON(
-                    label="Related Concepts",
-                    value={}
-                )
-            with gr.TabItem("Knowledge Graph"):
-                # Graph visualization
-                graph_output = gr.Plot(
-                    label="Legal Knowledge Graph"
-                )
-                # Add text area for showing clicked document content
-                selected_doc_content = gr.Textbox(
-                    label="Selected Document Content",
-                    interactive=False,
-                    lines=10
-                )
-        def process_query(query):
-        # Create event loop if it doesn't exist
-            try:
-                loop = asyncio.get_event_loop()
-            except RuntimeError:
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-            # Run the async function and get results
-            return loop.run_until_complete(
-                process_query_async(query, search_system, graph_search_system)
-            )
-        search_button.click(
-            fn=process_query,
-            inputs=[query_input],
-            outputs=[
-                status_output,
-                docs_output,
-                summary_output,
-                docs_output_graph,
-                graph_analysis_output,
-                graph_output,
-                selected_doc_content
-            ]
         )
-    return demo
-if __name__ == "__main__":
-    graph_search_system = EnhancedLegalSearchSystem(
-        google_api_key="AIzaSyDWHGMd8a70RbL3EBenfUwimcAHjhvgM6M",
-        neo4j_url="neo4j+s://a63462d0.databases.neo4j.io",
-        neo4j_username="neo4j",
-        neo4j_password="nU8Ut5N8k7LmQzNPe7vUbpZxMirK8rHrmLuzPc2G_Zc"
-    )
-    demo = create_interface(graph_search_system)
-    demo.launch()

+# import os
+# import gradio as gr
+# from pinecone import Pinecone
+# from sentence_transformers import SentenceTransformer
+# from typing import List, Dict, Optional
+# from langchain_google_genai import ChatGoogleGenerativeAI
+# from langchain.chains.summarize import load_summarize_chain
+# from langchain.prompts import PromptTemplate, ChatPromptTemplate
+# from langchain.docstore.document import Document
+# import time
+# import asyncio
+# import plotly.graph_objects as go
+# from neo4j import GraphDatabase
+# import networkx as nx
+# from langchain_community.vectorstores import Neo4jVector
+# from langchain.chains.summarize import load_summarize_chain
+# from langchain.chains import LLMChain
+# from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
+# class EnhancedLegalSearchSystem:
+#     def __init__(
+#         self,
+#         google_api_key: str,
+#         neo4j_url: str,
+#         neo4j_username: str,
+#         neo4j_password: str,
+#         embedding_model_name: str = "intfloat/e5-small-v2",
+#         device: str = "cpu"
+#     ):
+#         """Initialize the Enhanced Legal Search System"""
+#         # Initialize LLM
+#         self.llm = GoogleGenerativeAI(
+#             model="gemini-pro",
+#             google_api_key=google_api_key,
+#             temperature=0.1
+#         )
+#         # Initialize embeddings
+#         self.embeddings = GoogleGenerativeAIEmbeddings(
+#             model="models/embedding-001",
+#             google_api_key=google_api_key,
+#             task_type="retrieval_query"
+#         )
+#         # Initialize Neo4j connection
+#         self.neo4j_driver = GraphDatabase.driver(
+#             neo4j_url,
+#             auth=(neo4j_username, neo4j_password)
+#         )
+#         # Initialize vector store
+#         self.vector_store = Neo4jVector.from_existing_graph(
+#             embedding=self.embeddings,
+#             url=neo4j_url,
+#             username=neo4j_username,
+#             password=neo4j_password,
+#             node_label="Document",
+#             text_node_properties=["text"],
+#             embedding_node_property="embedding"
+#         )
+#         # Initialize additional embedding model for enhanced search
+#         self.local_embedding_model = SentenceTransformer(
+#             model_name_or_path=embedding_model_name,
+#             device=device
+#         )
+#         # Initialize prompts
+#         self.init_prompts()
+#     def __del__(self):
+#         """Cleanup Neo4j connection"""
+#         if hasattr(self, 'neo4j_driver'):
+#             self.neo4j_driver.close()
+#     def init_prompts(self):
+#         """Initialize enhanced prompts for legal analysis"""
+#         self.qa_prompt = ChatPromptTemplate.from_messages([
+#             ("system", """You are a legal expert assistant specializing in Indian law.
+#                          Analyze the following legal context and provide a detailed, structured answer to the question.
+#                          Include specific sections, rules, and precedents where applicable.
+#                          Format your response with clear headings and bullet points for better readability.
+#                          Context: {context}"""),
+#             ("human", "Question: {question}")
+#         ])
+#         self.map_prompt = PromptTemplate(
+#             template="""
+#             Analyze the following legal text segment:
+#             TEXT: "{text}"
+#             Instructions:
+#             1. Extract and summarize the key legal points
+#             2. Maintain all legal terminology exactly as written
+#             3. Preserve section numbers and references
+#             4. Keep all specific conditions and requirements
+#             5. Include any mentioned time periods or deadlines
+#             KEY POINTS:
+#             """,
+#             input_variables=["text"]  # Removed page_number as it's not used in the template
+#         )
+#         self.combine_prompt = PromptTemplate(
+#             template="""
+#             Question: {question}
+#             Using ONLY the information from the following legal document excerpts, provide a comprehensive answer:
+#             {text}
+#             Instructions:
+#             1. Base your response EXCLUSIVELY on the provided document excerpts
+#             2. If the documents don't contain enough information to fully answer the question, explicitly state what's missing
+#             3. Use direct quotes when appropriate
+#             4. Organize the response by relevant sections found in the documents
+#             5. If there are conflicting statements across documents, highlight them
+#             ANALYSIS:
+#             """,
+#             input_variables=["text", "question"]
+#         )
+#         # Initialize summarize chain
+#         self.chain = load_summarize_chain(
+#             llm=self.llm,
+#             chain_type="map_reduce",
+#             map_prompt=self.map_prompt,
+#             combine_prompt=self.combine_prompt,
+#             verbose=True
+#         )
+#     def get_related_legal_entities(self, query: str) -> List[Dict]:
+#         """Retrieve related legal entities and their relationships"""
+#         # Corrected Cypher query to handle aggregation properly
+#         cypher_query = """
+#                     // First, let's check if nodes exist and get their labels
+# MATCH (d:Document)
+# WHERE toLower(d.text) CONTAINS toLower($query)
+# WITH d
+# // Match all relationships from the document, collecting their types
+# OPTIONAL MATCH (d)-[r]-(connected)
+# WHERE NOT connected:Document  // Avoid direct document-to-document relations
+# WITH d,
+#      collect(DISTINCT type(r)) as relationTypes,
+#      collect(DISTINCT labels(connected)) as connectedLabels
+# // Now use these to build our main query
+# MATCH (d:Document)-[r1]-(e)
+# WHERE toLower(d.text) CONTAINS toLower($query)
+#   AND NOT e:Document  // Exclude direct document connections
+# WITH d, r1, e
+# // Get secondary connections, but be more specific about what we're looking for
+# OPTIONAL MATCH (e)-[r2]-(related)
+# WHERE (related:Entity OR related:Concept OR related:Section OR related:Case)
+#   AND related <> d  // Prevent cycles back to original document
+# WITH d, {
+#     source_id: id(d),
+#     source_text: d.text,
+#     document_type: COALESCE(d.type, "Unknown"),
+#     relationship_type: type(r1),
+#     entity: {
+#         id: id(e),
+#         type: CASE WHEN e:Entity THEN "Entity"
+#                   WHEN e:Concept THEN "Concept"
+#                   WHEN e:Section THEN "Section"
+#                   WHEN e:Case THEN "Case"
+#                   ELSE "Other" END,
+#         text: COALESCE(e.text, e.name, e.title, "Unnamed"),
+#         properties: properties(e)
+#     },
+#     related_entities: collect(DISTINCT {
+#         id: id(related),
+#         type: CASE WHEN related:Entity THEN "Entity"
+#                   WHEN related:Concept THEN "Concept"
+#                   WHEN related:Section THEN "Section"
+#                   WHEN related:Case THEN "Case"
+#                   ELSE "Other" END,
+#         relationship: type(r2),
+#         text: COALESCE(related.text, related.name, related.title, "Unnamed"),
+#         properties: properties(related)
+#     })
+# } as result
+# WHERE result.entity.text IS NOT NULL  // Filter out any results with null entity text
+# RETURN DISTINCT result
+# ORDER BY result.source_id, result.entity.id
+# LIMIT 25
+#         """
+#         try:
+#             with self.neo4j_driver.session() as session:
+#                 # Execute the improved query
+#                 result = session.run(cypher_query, {"query": query})
+#                 entities = [record["result"] for record in result]
+#                 # Log the results for debugging
+#                 print(f"Found {len(entities)} related entities")
+#                 if entities:
+#                     for entity in entities:
+#                         print(f"Entity: {entity['entity']['text']}")
+#                         print(f"Source: {entity['source_text'][:100]}...")
+#                         print(f"Related: {len(entity['related_entities'])} connections")
+#                 return entities
+#         except Exception as e:
+#             print(f"Error in get_related_legal_entities: {str(e)}")
+#             return []
+#     async def process_legal_query(
+#         self,
+#         question: str,
+#         top_k: int = 5,
+#         context_window: int = 1
+#     ) -> Dict[str, any]:
+#         """Process a legal query using both graph and vector search capabilities"""
+#         try:
+#             # 1. Perform semantic search
+#             semantic_results = self.vector_store.similarity_search(
+#                 question,
+#                 k=top_k,
+#                 search_type="hybrid"
+#             )
+#             # 2. Get related legal entities with the full question context
+#             related_entities = self.get_related_legal_entities(question)
+#             # Log the counts for debugging
+#             print(f"Found {len(semantic_results)} semantic results")
+#             print(f"Found {len(related_entities)} related entities")
+#             # 3. Expand context with related documents
+#             expanded_results = self.expand_context(
+#                 semantic_results,
+#                 context_window
+#             )
+#             # 4. Generate comprehensive answer
+#             documents = self._process_results(expanded_results, semantic_results)
+#             # 5. Prepare context for LLM
+#             context = self._prepare_context(documents, related_entities)
+#             # 6. Generate answer using LLM
+#             chain = LLMChain(llm=self.llm, prompt=self.qa_prompt)
+#             response = await chain.ainvoke({
+#                 "context": context,
+#                 "question": question
+#             })
+#             answer = response.get('text', '')
+#             # 7. Return structured response with explicit related concepts
+#             return {
+#                 "status": "Success",
+#                 "answer": answer,
+#                 "documents": self._format_documents(documents),
+#                 "related_concepts": related_entities,  # This should now contain data
+#                 "source_ids": sorted(list(set(doc.metadata.get('document_id', 'unknown') for doc in documents))),
+#                 "context_info": {
+#                     "direct_matches": len([d for d in documents if d.metadata.get('context_type') == "DIRECT MATCH"]),
+#                     "context_chunks": len([d for d in documents if d.metadata.get('context_type') == "CONTEXT"])
+#                 }
+#             }
+#         except Exception as e:
+#             print(f"Error in process_legal_query: {str(e)}")  # Add error logging
+#             return {
+#                 "status": f"Error: {str(e)}",
+#                 "answer": "An error occurred while processing your query.",
+#                 "documents": "",
+#                 "related_concepts": [],
+#                 "source_ids": [],
+#                 "context_info": {}
+#             }
+#     def expand_context(
+#         self,
+#         initial_results: List[Document],
+#         context_window: int
+#     ) -> List[Document]:
+#         """Expand context around search results"""
+#         expanded_results = []
+#         seen_ids = set()
+#         for doc in initial_results:
+#             doc_id = doc.metadata.get('document_id', doc.page_content[:50])
+#             if doc_id not in seen_ids:
+#                 # Query for related documents
+#                 context_results = self.vector_store.similarity_search(
+#                     doc.page_content,
+#                     k=2 * context_window + 1,
+#                     search_type="hybrid"
+#                 )
+#                 for result in context_results:
+#                     result_id = result.metadata.get('document_id', result.page_content[:50])
+#                     if result_id not in seen_ids:
+#                         expanded_results.append(result)
+#                         seen_ids.add(result_id)
+#         return expanded_results
+#     def _process_results(self, expanded_results: List[Document], initial_results: List[Document]) -> List[Document]:
+#         """Process and deduplicate search results"""
+#         seen_ids = set()
+#         documents = []
+#         for doc in expanded_results:
+#             doc_id = doc.metadata.get('document_id', doc.page_content[:50])
+#             if doc_id not in seen_ids:
+#                 seen_ids.add(doc_id)
+#                 is_direct_match = any(
+#                     r.metadata.get('document_id', r.page_content[:50]) == doc_id
+#                     for r in initial_results
+#                 )
+#                 doc.metadata['context_type'] = (
+#                     "DIRECT MATCH" if is_direct_match else "CONTEXT"
+#                 )
+#                 documents.append(doc)
+#         return sorted(
+#             documents,
+#             key=lambda x: x.metadata.get('document_id', 'unknown')
+#         )
+#     def _prepare_context(
+#         self,
+#         documents: List[Document],
+#         related_entities: List[Dict]
+#     ) -> str:
+#         """Prepare context for LLM processing"""
+#         context = "\n\nLegal Documents:\n" + "\n".join([
+#             f"[Document ID: {doc.metadata.get('document_id', 'unknown')}] {doc.page_content}"
+#             for doc in documents
+#         ])
+#         if related_entities:
+#             context += "\n\nRelated Legal Concepts and Relationships:\n"
+#             for entity in related_entities:
+#                 context += f"\n• {entity.get('entity', '')}"
+#                 if entity.get('related_entities'):
+#                     for related in entity['related_entities']:
+#                         if related.get('entity'):
+#                             context += f"\n  - {related['type']}: {related['entity']}"
+#         return context
+#     def _format_documents(self, documents: List[Document]) -> str:
+#         """Format documents as markdown"""
+#         markdown = "### Retrieved Documents\n\n"
+#         for i, doc in enumerate(documents, 1):
+#             markdown += (
+#                 f"**Document {i}** "
+#                 f"(ID: {doc.metadata.get('document_id', 'unknown')}, "
+#                 f"{doc.metadata.get('context_type', 'UNKNOWN')})\n"
+#                 f"```\n{doc.page_content}\n```\n\n"
+#             )
+#         return markdown
+#     def generate_document_graph(
+#         self,
+#         query: str,
+#         top_k: int = 5,
+#         similarity_threshold: float = 0.5
+#     ) -> List[Dict]:
+#         """Generate graph data based on document similarity and relationships"""
+#         try:
+#             # 1. Get initial semantic search results
+#             semantic_results = self.vector_store.similarity_search(
+#                 query,
+#                 k=top_k,
+#                 search_type="hybrid"
+#             )
+#             # 2. Get embeddings for all documents
+#             doc_texts = [doc.page_content for doc in semantic_results]
+#             doc_embeddings = self.local_embedding_model.encode(doc_texts)
+#             # 3. Create graph data structure
+#             graph_data = []
+#             seen_docs = set()
+#             # First, add all documents as nodes
+#             for i, doc in enumerate(semantic_results):
+#                 doc_id = doc.metadata.get('document_id', f'doc_{i}')
+#                 if doc_id not in seen_docs:
+#                     seen_docs.add(doc_id)
+#                     doc_type = doc.metadata.get('type', 'document')
+#                     # Create node entry
+#                     graph_data.append({
+#                         'source_id': doc_id,
+#                         'source_text': doc.page_content[:200],  # Truncate for display
+#                         'document_type': doc_type,
+#                         'entity': {
+#                             'id': doc_id,
+#                             'type': 'Document',
+#                             'text': f"Document {i + 1}",
+#                             'properties': {
+#                                 'similarity': 1.0,
+#                                 'length': len(doc.page_content)
+#                             }
+#                         },
+#                         'related_entities': []
+#                     })
+#             # Add relationships based on similarity
+#             from sklearn.metrics.pairwise import cosine_similarity
+#             similarity_matrix = cosine_similarity(doc_embeddings)
+#             # Create relationships between similar documents
+#             for i in range(len(semantic_results)):
+#                 related = []
+#                 for j in range(len(semantic_results)):
+#                     if i != j and similarity_matrix[i][j] > similarity_threshold:
+#                         doc_j = semantic_results[j]
+#                         doc_j_id = doc_j.metadata.get('document_id', f'doc_{j}')
+#                         related.append({
+#                             'id': doc_j_id,
+#                             'type': 'Document',
+#                             'relationship': 'similar_to',
+#                             'text': f"Document {j + 1}",
+#                             'properties': {
+#                                 'similarity_score': float(similarity_matrix[i][j])
+#                             }
+#                         })
+#                 # Add related documents to the graph data
+#                 if related:
+#                     graph_data[i]['related_entities'] = related
+#             return graph_data
+#         except Exception as e:
+#             print(f"Error generating document graph: {str(e)}")
+#             return []
+# def create_graph_visualization(graph_data: List[Dict]) -> go.Figure:
+#     """Create an interactive graph visualization using Plotly"""
+#     if not graph_data:
+#         return go.Figure(layout=go.Layout(title='No documents found'))
+#     # Initialize graph
+#     G = nx.Graph()
+#     # Color mapping
+#     color_map = {
+#         'Document': '#3B82F6',  # blue
+#         'Section': '#10B981',   # green
+#         'Reference': '#F59E0B'  # yellow
+#     }
+#     # Node information storage
+#     node_colors = []
+#     node_texts = []
+#     node_hovers = []  # Full text for hover
+#     nodes_added = set()
+#     # Process nodes and edges
+#     for data in graph_data:
+#         source_id = data['source_id']
+#         source_text = data['source_text']
+#         # Add main document node
+#         if source_id not in nodes_added:
+#             G.add_node(source_id)
+#             node_colors.append(color_map['Document'])
+#             # Short text for display
+#             node_texts.append(f"Doc {len(nodes_added)+1}")
+#             # Full text for hover/click
+#             node_hovers.append(f"Document {len(nodes_added)+1}:<br><br>{source_text}")
+#             nodes_added.add(source_id)
+#         # Process related documents
+#         for related in data.get('related_entities', []):
+#             related_id = related['id']
+#             similarity = related['properties'].get('similarity_score', 0.0)
+#             if related_id not in nodes_added:
+#                 G.add_node(related_id)
+#                 node_colors.append(color_map['Document'])
+#                 node_texts.append(f"Doc {len(nodes_added)+1}")
+#                 node_hovers.append(f"Document {len(nodes_added)+1}:<br><br>{related['text']}")
+#                 nodes_added.add(related_id)
+#             # Add edge with similarity weight
+#             G.add_edge(
+#                 source_id,
+#                 related_id,
+#                 weight=similarity,
+#                 relationship=f"Similarity: {similarity:.2f}"
+#             )
+#     # Create layout
+#     pos = nx.spring_layout(G, k=2.0, iterations=50)
+#     # Create edge trace
+#     edge_x = []
+#     edge_y = []
+#     edge_text = []
+#     for edge in G.edges(data=True):
+#         x0, y0 = pos[edge[0]]
+#         x1, y1 = pos[edge[1]]
+#         # Create curved line
+#         mid_x = (x0 + x1) / 2
+#         mid_y = (y0 + y1) / 2
+#         # Add some curvature
+#         mid_x += (y1 - y0) * 0.1
+#         mid_y -= (x1 - x0) * 0.1
+#         # Add points for curved line
+#         edge_x.extend([x0, mid_x, x1, None])
+#         edge_y.extend([y0, mid_y, y1, None])
+#         edge_text.append(edge[2]['relationship'])
+#     edge_trace = go.Scatter(
+#         x=edge_x,
+#         y=edge_y,
+#         line=dict(width=1.5, color='#9CA3AF'),
+#         hoverinfo='text',
+#         text=edge_text,
+#         mode='lines'
+#     )
+#     # Create node trace
+#     node_x = []
+#     node_y = []
+#     for node in G.nodes():
+#         x, y = pos[node]
+#         node_x.append(x)
+#         node_y.append(y)
+#     node_trace = go.Scatter(
+#         x=node_x,
+#         y=node_y,
+#         mode='markers+text',
+#         hoverinfo='text',
+#         text=node_texts,
+#         hovertext=node_hovers,  # Full text shown on hover
+#         textposition="top center",
+#         marker=dict(
+#             size=30,
+#             color=node_colors,
+#             line=dict(width=2, color='white'),
+#             symbol='circle'
+#         ),
+#         customdata=node_hovers  # Store full text for click events
+#     )
+#     # Create figure with updated layout
+#     fig = go.Figure(
+#         data=[edge_trace, node_trace],
+#         layout=go.Layout(
+#             title={
+#                 'text': 'Document Similarity Graph<br><sub>Click nodes to view full text</sub>',
+#                 'y': 0.95,
+#                 'x': 0.5,
+#                 'xanchor': 'center',
+#                 'yanchor': 'top'
+#             },
+#             showlegend=False,
+#             hovermode='closest',
+#             margin=dict(b=20, l=5, r=5, t=60),
+#             xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+#             yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+#             plot_bgcolor='white',
+#             width=800,
+#             height=600,
+#             clickmode='event+select'  # Enable click events
+#         )
+#     )
+#     return fig
+# def create_interface(search_system: EnhancedLegalSearchSystem):
+#     """Create Gradio interface with interactive graph"""
+#     with gr.Blocks(css="footer {display: none !important;}") as demo:
+#         gr.Markdown("""
+#         # Enhanced Legal Search System
+#         Enter your legal query below to search through documents and get an AI-powered analysis.
+#         This system combines graph-based and semantic search capabilities for comprehensive legal research.
+#         """)
+#         with gr.Row():
+#             query_input = gr.Textbox(
+#                 label="Legal Query",
+#                 placeholder="e.g., What are the reporting obligations for banks under the Money Laundering Act?",
+#                 lines=3
+#             )
+#         with gr.Row():
+#             search_button = gr.Button("Search & Analyze")
+#         status_output = gr.Textbox(
+#             label="Status",
+#             interactive=False
+#         )
+#         with gr.Tabs():
+#             with gr.TabItem("AI Legal Analysis"):
+#                 analysis_output = gr.Markdown(
+#                     label="AI-Generated Legal Analysis",
+#                     value="Analysis will appear here..."
+#                 )
+#             with gr.TabItem("Retrieved Documents"):
+#                 docs_output = gr.Markdown(
+#                     label="Source Documents",
+#                     value="Search results will appear here..."
+#                 )
+#             with gr.TabItem("Related Concepts"):
+#                 concepts_output = gr.Json(
+#                     label="Related Legal Concepts",
+#                     value={}
+#                 )
+#             with gr.TabItem("Knowledge Graph"):
+#                 # Graph visualization
+#                 graph_output = gr.Plot(
+#                     label="Legal Knowledge Graph"
+#                 )
+#                 # Add text area for showing clicked document content
+#                 selected_doc_content = gr.Textbox(
+#                     label="Selected Document Content",
+#                     interactive=False,
+#                     lines=10
+#                 )
+#         async def process_query(query):
+#             if not query.strip():
+#                 return (
+#                     "Please enter a query",
+#                     "No analysis available",
+#                     "No documents available",
+#                     {},
+#                     None,
+#                     ""
+#                 )
+#             results = await search_system.process_legal_query(query)
+#             graph_data = search_system.generate_document_graph(query)
+#             graph_fig = create_graph_visualization(graph_data)
+#             return (
+#                 results['status'],
+#                 results['answer'],
+#                 results['documents'],
+#                 {"related_concepts": results['related_concepts']},
+#                 graph_fig,
+#                 "Click on a node to view document content"
+#             )
+#         search_button.click(
+#             fn=process_query,
+#             inputs=[query_input],
+#             outputs=[
+#                 status_output,
+#                 analysis_output,
+#                 docs_output,
+#                 concepts_output,
+#                 graph_output,
+#                 selected_doc_content
+#             ]
+#         )
+#     return demo
+# class LegalSearchSystem:
+#     def __init__(
+#         self,
+#         pinecone_api_key: str = "pcsk_43sajZ_MjcXR2yN5cAcVi8RARyB6i3NP3wLTnTLugbUcN9cUU4q5EfNmuwLPkmxAvykk9o",
+#         google_api_key: str = "AIzaSyDWHGMd8a70RbL3EBenfUwimcAHjhvgM6M",
+#         environment: str = "us-east-1",
+#         index_name: str = "pdf-embeddings",
+#         dimension: int = 384,
+#         embedding_model_name: str = "intfloat/e5-small-v2",
+#         device: str = "cpu"
+#     ):
+#         # Initialize Pinecone
+#         self.pc = Pinecone(api_key=pinecone_api_key)
+#         # Initialize LangChain with Gemini
+#         self.llm = ChatGoogleGenerativeAI(
+#             model="gemini-pro",
+#             temperature=0,
+#             google_api_key=google_api_key
+#         )
+#         # Initialize prompts
+#         self.map_prompt = PromptTemplate(
+#             template="""
+#             Analyze the following legal text segment and extract key information:
+#             TEXT: "{text}"
+#             Instructions:
+#             1. Maintain all legal terminology exactly as written
+#             2. Preserve section numbers and references
+#             3. Keep all specific conditions and requirements
+#             4. Include any mentioned time periods or deadlines
+#             DETAILED ANALYSIS:
+#             """,
+#             input_variables=["text"]
+#         )
+#         self.combine_prompt = PromptTemplate(
+#             template="""
+#             Based on the following excerpts from legal documents and the question: "{question}"
+#             EXCERPTS:
+#             {text}
+#             Instructions:
+#             1. Synthesize a comprehensive answer that connects relevant sections
+#             2. Maintain precise legal language from the source material
+#             3. Reference specific sections and subsections where applicable
+#             4. If there are seemingly disconnected pieces of information, explain their relationship
+#             5. Highlight any conditions or exceptions that span multiple excerpts
+#             COMPREHENSIVE LEGAL ANALYSIS:
+#             """,
+#             input_variables=["text", "question"]
+#         )
+#         # Initialize chain
+#         self.chain = load_summarize_chain(
+#             llm=self.llm,
+#             chain_type="stuff",
+#             prompt=self.combine_prompt,
+#             verbose=True
+#         )
+#         # Initialize Pinecone index and embedding model
+#         self.index = self.pc.Index(index_name)
+#         self.embedding_model = SentenceTransformer(
+#             model_name_or_path=embedding_model_name,
+#             device=device
+#         )
+#     def search(self, query_text: str, top_k: int = 5, context_window: int = 1) -> Dict:
+#         """
+#         Perform a search and analysis of the legal query.
+#         """
+#         try:
+#             # Get search results with context
+#             results = self.query_and_summarize(
+#                 query_text=query_text,
+#                 top_k=top_k,
+#                 context_window=context_window
+#             )
+#             # Format the results for display
+#             docs_markdown = self._format_documents(results['raw_results'])
+#             return {
+#                 'status': "Search completed successfully",
+#                 'documents': docs_markdown,
+#                 'analysis': results['summary'],
+#                 'source_pages': results['source_pages'],
+#                 'context_info': results['context_info']
+#             }
+#         except Exception as e:
+#             return {
+#                 'status': f"Error during search: {str(e)}",
+#                 'documents': "Error retrieving documents",
+#                 'analysis': "Error generating analysis",
+#                 'source_pages': [],
+#                 'context_info': {}
+#             }
+#     def query_and_summarize(
+#         self,
+#         query_text: str,
+#         top_k: int = 5,
+#         filter: Optional[Dict] = None,
+#         context_window: int = 1
+#     ) -> Dict:
+#         """
+#         Query Pinecone and generate a summary with enhanced context handling.
+#         """
+#         # Generate embedding for query
+#         query_embedding = self.embedding_model.encode(query_text).tolist()
+#         # Query Pinecone
+#         initial_results = self.index.query(
+#             vector=query_embedding,
+#             top_k=top_k,
+#             include_metadata=True,
+#             filter=filter
+#         )['matches']
+#         # Expand context
+#         expanded_results = []
+#         for match in initial_results:
+#             page_num = match['metadata']['page_number']
+#             context_filter = {
+#                 "page_number": {
+#                     "$gte": max(1, page_num - context_window),
+#                     "$lte": page_num + context_window
+#                 }
+#             }
+#             if filter:
+#                 context_filter.update(filter)
+#             context_results = self.index.query(
+#                 vector=self.embedding_model.encode(match['metadata']['text']).tolist(),
+#                 top_k=2 * context_window + 1,
+#                 include_metadata=True,
+#                 filter=context_filter
+#             )['matches']
+#             expanded_results.extend(context_results)
+#         # Process results and generate summary
+#         documents = self._process_results(expanded_results, initial_results)
+#         summary = self.chain.run(
+#             input_documents=documents,
+#             question=query_text
+#         )
+#         return {
+#             'raw_results': expanded_results,
+#             'summary': summary,
+#             'source_pages': list(set(doc.metadata['page_number'] for doc in documents)),
+#             'context_info': {
+#                 'direct_matches': len([d for d in documents if d.metadata['context_type'] == "DIRECT MATCH"]),
+#                 'context_chunks': len([d for d in documents if d.metadata['context_type'] == "CONTEXT"])
+#             }
+#         }
+#     def _process_results(self, expanded_results: List[Dict], initial_results: List[Dict]) -> List[Document]:
+#         """
+#         Process and deduplicate search results.
+#         """
+#         seen_ids = set()
+#         documents = []
+#         for result in expanded_results:
+#             if result['id'] not in seen_ids:
+#                 seen_ids.add(result['id'])
+#                 is_direct_match = any(r['id'] == result['id'] for r in initial_results)
+#                 documents.append(Document(
+#                     page_content=result['metadata']['text'],
+#                     metadata={
+#                         'score': result['score'],
+#                         'page_number': result['metadata']['page_number'],
+#                         'context_type': "DIRECT MATCH" if is_direct_match else "CONTEXT"
+#                     }
+#                 ))
+#         return sorted(documents, key=lambda x: x.metadata['page_number'])
+#     def _format_documents(self, results: List[Dict]) -> str:
+#         """
+#         Format search results as markdown.
+#         """
+#         markdown = "### Retrieved Documents\n\n"
+#         for i, result in enumerate(results, 1):
+#             markdown += f"**Document {i}** (Page {result['metadata']['page_number']})\n"
+#             markdown += f"```\n{result['metadata']['text']}\n```\n\n"
+#         return markdown
+# async def process_query_async(query: str, search_system: LegalSearchSystem, graph_search_system: EnhancedLegalSearchSystem):
+#     """
+#     Asynchronous function to process both traditional and graph-based searches
+#     """
+#     if not query.strip():
+#         return "Please enter a query", "", "", "", {}
+#     # Regular search (synchronous)
+#     results = search_system.search(query)
+#     try:
+#         # Graph search (asynchronous)
+#         graph_results = await graph_search_system.process_legal_query(query)
+#         graph_documents = graph_results.get('documents', "Error processing graph search")
+#         graph_concepts = graph_results.get('related_concepts', {})
+#     except Exception as e:
+#         graph_documents = f"Error processing graph search: {str(e)}"
+#         graph_concepts = {}
+#     graph_data = graph_search_system.generate_document_graph(query)
+#     graph_fig = create_graph_visualization(graph_data)
+#     return (
+#         results['status'],
+#         results['documents'],
+#         results['analysis'],
+#         graph_documents,
+#         graph_concepts,
+#         graph_fig,
+#         "Click on a node to view document content"
+#     )
+# def create_interface(graph_search_system: EnhancedLegalSearchSystem):
+#     search_system = LegalSearchSystem()
+#     with gr.Blocks(css="footer {display: none !important;}") as demo:
+#         gr.Markdown("""
+#         # Corporate Law Legal Search Engine
+#         Enter your legal query below to search through documents and get an AI-powered analysis.Queires only related to corporate law will give relevant information
+#         """)
+#         with gr.Row():
+#             query_input = gr.Textbox(
+#                 label="Legal Query",
+#                 placeholder="e.g., What are the key principles of contract law?",
+#                 lines=3
+#             )
+#         with gr.Row():
+#             search_button = gr.Button("Search & Analyze")
+#         status_output = gr.Textbox(
+#             label="Status",
+#             interactive=False
+#         )
+#         with gr.Tabs():
+#             with gr.TabItem("Search Results"):
+#                 docs_output = gr.Markdown(
+#                     label="Retrieved Documents",
+#                     value="Search results will appear here..."
+#                 )
+#             with gr.TabItem("AI Legal Analysis"):
+#                 summary_output = gr.Markdown(
+#                     label="AI-Generated Legal Analysis",
+#                     value="Analysis will appear here..."
+#                 )
+#             with gr.TabItem("Retrieved Documents through Graph Rag"):
+#                 docs_output_graph = gr.Markdown(
+#                     label="Source Documents",
+#                     value="Search results will appear here..."
+#                 )
+#                 graph_analysis_output = gr.JSON(
+#                     label="Related Concepts",
+#                     value={}
+#                 )
+#             with gr.TabItem("Knowledge Graph"):
+#                 # Graph visualization
+#                 graph_output = gr.Plot(
+#                     label="Legal Knowledge Graph"
+#                 )
+#                 # Add text area for showing clicked document content
+#                 selected_doc_content = gr.Textbox(
+#                     label="Selected Document Content",
+#                     interactive=False,
+#                     lines=10
+#                 )
+#         def process_query(query):
+#         # Create event loop if it doesn't exist
+#             try:
+#                 loop = asyncio.get_event_loop()
+#             except RuntimeError:
+#                 loop = asyncio.new_event_loop()
+#                 asyncio.set_event_loop(loop)
+#             # Run the async function and get results
+#             return loop.run_until_complete(
+#                 process_query_async(query, search_system, graph_search_system)
+#             )
+#         search_button.click(
+#             fn=process_query,
+#             inputs=[query_input],
+#             outputs=[
+#                 status_output,
+#                 docs_output,
+#                 summary_output,
+#                 docs_output_graph,
+#                 graph_analysis_output,
+#                 graph_output,
+#                 selected_doc_content
+#             ]
+#         )
+#     return demo
+# if __name__ == "__main__":
+#     graph_search_system = EnhancedLegalSearchSystem(
+#         google_api_key="AIzaSyDWHGMd8a70RbL3EBenfUwimcAHjhvgM6M",
+#         neo4j_url="neo4j+s://a63462d0.databases.neo4j.io",
+#         neo4j_username="neo4j",
+#         neo4j_password="nU8Ut5N8k7LmQzNPe7vUbpZxMirK8rHrmLuzPc2G_Zc"
+#     )
+#     demo = create_interface(graph_search_system)
+#     demo.launch()
 import os
 import gradio as gr
 from pinecone import Pinecone
     Asynchronous function to process both traditional and graph-based searches
     """
     if not query.strip():
+        return "Please enter a query", "", "", "", {}, None, ""
     # Regular search (synchronous)
+    rag_results = search_system.search(query)
     try:
         # Graph search (asynchronous)
         graph_results = await graph_search_system.process_legal_query(query)
         graph_documents = graph_results.get('documents', "Error processing graph search")
         graph_concepts = graph_results.get('related_concepts', {})
+        # Prepare graph context for LLM
+        graph_context = graph_search_system._prepare_context(
+            [Document(page_content=d, metadata={}) for d in graph_documents.split("```\n\n```")], # This is a crude split, needs improvement
+            graph_concepts['related_concepts'] if isinstance(graph_concepts, dict) and 'related_concepts' in graph_concepts else []
         )
+    except Exception as e:
+        graph_documents = f"Error processing graph search: {str(e)}"
+        graph_concepts = {}
+        graph_context = ""
+    graph_data = graph_search_system.generate_document_graph(query)