Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ from astrapy.db import AstraDB
|
|
8 |
from dotenv import load_dotenv
|
9 |
from huggingface_hub import login
|
10 |
from sentence_transformers import SentenceTransformer
|
|
|
11 |
|
12 |
# Load environment variables
|
13 |
load_dotenv()
|
@@ -68,16 +69,45 @@ class LegalTextSearchBot:
|
|
68 |
# Generate embedding for the query
|
69 |
query_embedding = self.embedding_model.encode(query).tolist()
|
70 |
|
71 |
-
#
|
72 |
-
results = self.collection.vector_find(
|
73 |
query_embedding,
|
74 |
-
limit=5
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
except Exception as e:
|
78 |
print(f"Error searching AstraDB: {str(e)}")
|
79 |
return []
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
def search_sections(self, query: str) -> Tuple[str, str]:
|
82 |
"""Search legal sections and return both raw results and AI interpretation"""
|
83 |
try:
|
@@ -85,24 +115,24 @@ class LegalTextSearchBot:
|
|
85 |
search_results = self._search_astra(query)
|
86 |
|
87 |
if not search_results:
|
88 |
-
return "No relevant sections found.", ""
|
89 |
|
90 |
# Format raw results
|
91 |
raw_results = []
|
|
|
|
|
92 |
for result in search_results:
|
93 |
-
|
94 |
-
|
95 |
-
Chapter: {result.get('chapter_info', {}).get('title', 'N/A')}
|
96 |
-
|
97 |
-
Content:
|
98 |
-
{result.get('content', 'N/A')}
|
99 |
|
100 |
-
|
101 |
-
"""
|
102 |
-
|
|
|
|
|
103 |
|
104 |
-
# Combine
|
105 |
-
context = "\n\n".join(
|
106 |
|
107 |
# Generate AI interpretation
|
108 |
chain = self.prompt | self.llm
|
@@ -117,7 +147,9 @@ class LegalTextSearchBot:
|
|
117 |
return "\n".join(raw_results), ai_response
|
118 |
|
119 |
except Exception as e:
|
120 |
-
|
|
|
|
|
121 |
|
122 |
def create_interface():
|
123 |
"""Create the Gradio interface"""
|
|
|
8 |
from dotenv import load_dotenv
|
9 |
from huggingface_hub import login
|
10 |
from sentence_transformers import SentenceTransformer
|
11 |
+
import json
|
12 |
|
13 |
# Load environment variables
|
14 |
load_dotenv()
|
|
|
69 |
# Generate embedding for the query
|
70 |
query_embedding = self.embedding_model.encode(query).tolist()
|
71 |
|
72 |
+
# First try searching in searchable_text
|
73 |
+
results = list(self.collection.vector_find(
|
74 |
query_embedding,
|
75 |
+
limit=5,
|
76 |
+
fields=["section_number", "title", "chapter_info", "content", "searchable_text"]
|
77 |
+
))
|
78 |
+
|
79 |
+
if not results:
|
80 |
+
# If no results, try a more general search
|
81 |
+
results = list(self.collection.find(
|
82 |
+
{},
|
83 |
+
limit=5
|
84 |
+
))
|
85 |
+
|
86 |
+
return results
|
87 |
+
|
88 |
except Exception as e:
|
89 |
print(f"Error searching AstraDB: {str(e)}")
|
90 |
return []
|
91 |
|
92 |
+
def format_section(self, section: Dict) -> str:
|
93 |
+
"""Format a section for display"""
|
94 |
+
try:
|
95 |
+
chapter_info = section.get('chapter_info', {})
|
96 |
+
chapter_title = chapter_info.get('title', 'N/A') if isinstance(chapter_info, dict) else 'N/A'
|
97 |
+
|
98 |
+
return f"""
|
99 |
+
Section {section.get('section_number', 'N/A')}: {section.get('title', 'N/A')}
|
100 |
+
Chapter: {chapter_title}
|
101 |
+
|
102 |
+
Content:
|
103 |
+
{section.get('content', 'N/A')}
|
104 |
+
|
105 |
+
{"="*80}
|
106 |
+
"""
|
107 |
+
except Exception as e:
|
108 |
+
print(f"Error formatting section: {str(e)}")
|
109 |
+
return str(section)
|
110 |
+
|
111 |
def search_sections(self, query: str) -> Tuple[str, str]:
|
112 |
"""Search legal sections and return both raw results and AI interpretation"""
|
113 |
try:
|
|
|
115 |
search_results = self._search_astra(query)
|
116 |
|
117 |
if not search_results:
|
118 |
+
return "No relevant sections found.", "I apologize, but I cannot find relevant sections in the database."
|
119 |
|
120 |
# Format raw results
|
121 |
raw_results = []
|
122 |
+
context_parts = []
|
123 |
+
|
124 |
for result in search_results:
|
125 |
+
# Format for display
|
126 |
+
raw_results.append(self.format_section(result))
|
|
|
|
|
|
|
|
|
127 |
|
128 |
+
# Add to context for AI
|
129 |
+
context_parts.append(f"""
|
130 |
+
Section {result.get('section_number')}: {result.get('title')}
|
131 |
+
{result.get('content', '')}
|
132 |
+
""")
|
133 |
|
134 |
+
# Combine context for AI
|
135 |
+
context = "\n\n".join(context_parts)
|
136 |
|
137 |
# Generate AI interpretation
|
138 |
chain = self.prompt | self.llm
|
|
|
147 |
return "\n".join(raw_results), ai_response
|
148 |
|
149 |
except Exception as e:
|
150 |
+
error_msg = f"Error processing query: {str(e)}"
|
151 |
+
print(error_msg)
|
152 |
+
return error_msg, "An error occurred while processing your query."
|
153 |
|
154 |
def create_interface():
|
155 |
"""Create the Gradio interface"""
|