Update app2.py
Browse files
app2.py
CHANGED
@@ -10,6 +10,7 @@ import PyPDF2
|
|
10 |
import io
|
11 |
import random
|
12 |
import docx
|
|
|
13 |
from docx import Document
|
14 |
from docx.shared import Inches
|
15 |
from datetime import datetime
|
@@ -47,8 +48,7 @@ except ImportError:
|
|
47 |
st.error("Missing dependency: streamlit_lottie. Please install it using 'pip install streamlit-lottie'")
|
48 |
st.stop()
|
49 |
|
50 |
-
|
51 |
-
AI71_API_KEY = "api71-api-92fc2ef9-9f3c-47e5-a019-18e257b04af2"
|
52 |
|
53 |
# Initialize AI71 client
|
54 |
try:
|
@@ -57,7 +57,6 @@ except Exception as e:
|
|
57 |
st.error(f"Failed to initialize AI71 client: {str(e)}")
|
58 |
st.stop()
|
59 |
|
60 |
-
# Initialize chat history and other session state variables
|
61 |
if "chat_history" not in st.session_state:
|
62 |
st.session_state.chat_history = []
|
63 |
if "uploaded_documents" not in st.session_state:
|
@@ -114,7 +113,7 @@ def get_ai_response(prompt: str) -> str:
|
|
114 |
except Exception as e:
|
115 |
print(f"Streaming failed, falling back to non-streaming request. Error: {e}")
|
116 |
try:
|
117 |
-
#
|
118 |
completion = ai71.chat.completions.create(
|
119 |
model="tiiuae/falcon-180b-chat",
|
120 |
messages=messages,
|
@@ -177,7 +176,7 @@ def search_web(query: str, num_results: int = 3) -> List[Dict[str, str]]:
|
|
177 |
results = []
|
178 |
if "items" in res:
|
179 |
for item in res["items"]:
|
180 |
-
# Check if the result is relevant
|
181 |
if any(keyword in item["title"].lower() or keyword in item["snippet"].lower()
|
182 |
for keyword in ["law", "legal", "court", "case", "attorney", "lawyer"]):
|
183 |
result = {
|
@@ -229,7 +228,7 @@ def perform_web_search(query: str) -> List[Dict[str, Any]]:
|
|
229 |
"cost_estimates": cost_estimates
|
230 |
})
|
231 |
|
232 |
-
return results[:3] # Return top 3 results with cost estimates
|
233 |
|
234 |
def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
|
235 |
"""Performs a comprehensive analysis of the document, including web and Wikipedia searches."""
|
@@ -246,7 +245,7 @@ def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
|
|
246 |
|
247 |
return {
|
248 |
"document_analysis": document_analysis,
|
249 |
-
"related_articles": web_results or [], # Ensure this is always a list
|
250 |
"wikipedia_summary": wiki_results
|
251 |
}
|
252 |
except Exception as e:
|
@@ -259,7 +258,7 @@ def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
|
|
259 |
|
260 |
def search_wikipedia(query: str, sentences: int = 2) -> Dict[str, str]:
|
261 |
try:
|
262 |
-
#
|
263 |
truncated_query = str(query)[:300]
|
264 |
|
265 |
# Search Wikipedia
|
@@ -276,7 +275,6 @@ def search_wikipedia(query: str, sentences: int = 2) -> Dict[str, str]:
|
|
276 |
summary = wikipedia.summary(page.title, sentences=sentences, auto_suggest=False)
|
277 |
return {"summary": summary, "url": page.url, "title": page.title}
|
278 |
except wikipedia.exceptions.DisambiguationError as e:
|
279 |
-
# If it's a disambiguation page, choose the first option
|
280 |
try:
|
281 |
page = wikipedia.page(e.options[0], auto_suggest=False)
|
282 |
summary = wikipedia.summary(page.title, sentences=sentences, auto_suggest=False)
|
@@ -454,7 +452,7 @@ def query_public_case_law(query: str) -> List[Dict[str, Any]]:
|
|
454 |
|
455 |
justia_results = justia_soup.find_all('div', class_='g')
|
456 |
|
457 |
-
for result in justia_results[:5]: #
|
458 |
title_elem = result.find('h3')
|
459 |
link_elem = result.find('a')
|
460 |
snippet_elem = result.find('div', class_='VwiC3b')
|
@@ -464,7 +462,7 @@ def query_public_case_law(query: str) -> List[Dict[str, Any]]:
|
|
464 |
link = link_elem['href']
|
465 |
snippet = snippet_elem.text
|
466 |
|
467 |
-
#
|
468 |
case_info = title.split(' - ')
|
469 |
if len(case_info) >= 2:
|
470 |
case_name = case_info[0]
|
@@ -518,7 +516,6 @@ def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
|
|
518 |
analysis_prompt = f"Analyze the following legal document and provide a summary, potential issues, and key clauses:\n\n{content}"
|
519 |
document_analysis = get_ai_response(analysis_prompt)
|
520 |
|
521 |
-
# Extract main topics or keywords from the document
|
522 |
topic_extraction_prompt = f"Extract the main topics or keywords from the following document summary:\n\n{document_analysis}"
|
523 |
topics = get_ai_response(topic_extraction_prompt)
|
524 |
|
@@ -527,7 +524,7 @@ def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
|
|
527 |
|
528 |
return {
|
529 |
"document_analysis": document_analysis,
|
530 |
-
"related_articles": web_results or [],
|
531 |
"wikipedia_summary": wiki_results
|
532 |
}
|
533 |
except Exception as e:
|
@@ -647,7 +644,6 @@ def search_web_duckduckgo(query: str, num_results: int = 3, max_retries: int = 3
|
|
647 |
|
648 |
service = build("customsearch", "v1", developerKey=api_key)
|
649 |
|
650 |
-
# Execute the search request
|
651 |
res = service.cse().list(q=query, cx=cse_id, num=num_results).execute()
|
652 |
|
653 |
results = []
|
@@ -963,7 +959,7 @@ def case_trend_visualizer_ui():
|
|
963 |
# Display the Plotly chart
|
964 |
st.plotly_chart(fig, use_container_width=True)
|
965 |
|
966 |
-
#
|
967 |
st.subheader("Case Statistics")
|
968 |
total_cases = df['Number of Cases'].sum()
|
969 |
avg_cases = df['Number of Cases'].mean()
|
@@ -975,13 +971,13 @@ def case_trend_visualizer_ui():
|
|
975 |
col2.metric("Average Cases per Year", f"{avg_cases:,.0f}")
|
976 |
col3.metric("Peak Year", f"{max_year}")
|
977 |
|
978 |
-
#
|
979 |
st.write("Trend Description:", get_trend_description(df))
|
980 |
|
981 |
if st.session_state.current_data is not None:
|
982 |
df = st.session_state.current_data
|
983 |
|
984 |
-
#
|
985 |
st.subheader("Interactive Analysis")
|
986 |
|
987 |
# Year-over-Year Change
|
@@ -998,11 +994,11 @@ def case_trend_visualizer_ui():
|
|
998 |
ma_fig = px.line(df, x='Year', y=['Number of Cases', 'Moving Average'], title=f"{window}-Year Moving Average")
|
999 |
st.plotly_chart(ma_fig, use_container_width=True)
|
1000 |
|
1001 |
-
#
|
1002 |
st.subheader("Raw Data")
|
1003 |
st.dataframe(df)
|
1004 |
|
1005 |
-
#
|
1006 |
csv = df.to_csv(index=False)
|
1007 |
st.download_button(
|
1008 |
label="Download data as CSV",
|
@@ -1011,7 +1007,7 @@ def case_trend_visualizer_ui():
|
|
1011 |
mime="text/csv",
|
1012 |
)
|
1013 |
|
1014 |
-
#
|
1015 |
st.subheader("Additional Information")
|
1016 |
info = get_additional_info(case_type)
|
1017 |
st.markdown(info)
|
@@ -1346,7 +1342,7 @@ class LegalDataRetriever:
|
|
1346 |
if author_elem:
|
1347 |
author = author_elem.text.strip()
|
1348 |
elif judges and judges[0] != "Not available":
|
1349 |
-
author = judges[0]
|
1350 |
|
1351 |
if author == "Not available":
|
1352 |
self.logger.warning("No author found in the HTML structure, searching in text content")
|
@@ -1365,24 +1361,21 @@ class LegalDataRetriever:
|
|
1365 |
return author
|
1366 |
|
1367 |
def extract_court_opinion(self, soup):
|
1368 |
-
# Target the article tag with class col-sm-9 first
|
1369 |
article_div = soup.find('article', class_='col-sm-9')
|
1370 |
if not article_div:
|
1371 |
self.logger.error("Could not find the main article div (col-sm-9).")
|
1372 |
return "Case details not available (main article div not found)."
|
1373 |
|
1374 |
-
# Find the tab-content div within the article div
|
1375 |
opinion_div = article_div.find('div', class_='tab-content')
|
1376 |
if not opinion_div:
|
1377 |
self.logger.error("Could not find the case details content (tab-content div).")
|
1378 |
return "Case details not available (tab-content div not found)."
|
1379 |
|
1380 |
-
# Extract all text from the tab-content div
|
1381 |
case_details = opinion_div.get_text(separator='\n', strip=True)
|
1382 |
|
1383 |
# Clean up the text
|
1384 |
-
case_details = re.sub(r'\n+', '\n', case_details)
|
1385 |
-
case_details = re.sub(r'\s+', ' ', case_details)
|
1386 |
|
1387 |
return case_details
|
1388 |
|
@@ -1563,7 +1556,6 @@ def automated_legal_brief_generation_ui():
|
|
1563 |
mime="text/plain"
|
1564 |
)
|
1565 |
# --- Streamlit App ---
|
1566 |
-
# Custom CSS to improve the overall look
|
1567 |
st.markdown("""
|
1568 |
<style>
|
1569 |
.reportview-container {
|
@@ -1776,8 +1768,7 @@ elif feature == "Case Information Retrieval":
|
|
1776 |
|
1777 |
elif feature == "Automated Legal Brief Generation":
|
1778 |
automated_legal_brief_generation_ui()
|
1779 |
-
|
1780 |
-
# Footer
|
1781 |
st.markdown("---")
|
1782 |
st.markdown(
|
1783 |
"""
|
|
|
10 |
import io
|
11 |
import random
|
12 |
import docx
|
13 |
+
import os
|
14 |
from docx import Document
|
15 |
from docx.shared import Inches
|
16 |
from datetime import datetime
|
|
|
48 |
st.error("Missing dependency: streamlit_lottie. Please install it using 'pip install streamlit-lottie'")
|
49 |
st.stop()
|
50 |
|
51 |
+
AI71_API_KEY = os.getenv('AI71_API_KEY')
|
|
|
52 |
|
53 |
# Initialize AI71 client
|
54 |
try:
|
|
|
57 |
st.error(f"Failed to initialize AI71 client: {str(e)}")
|
58 |
st.stop()
|
59 |
|
|
|
60 |
if "chat_history" not in st.session_state:
|
61 |
st.session_state.chat_history = []
|
62 |
if "uploaded_documents" not in st.session_state:
|
|
|
113 |
except Exception as e:
|
114 |
print(f"Streaming failed, falling back to non-streaming request. Error: {e}")
|
115 |
try:
|
116 |
+
# makes it fall back to non-streaming request
|
117 |
completion = ai71.chat.completions.create(
|
118 |
model="tiiuae/falcon-180b-chat",
|
119 |
messages=messages,
|
|
|
176 |
results = []
|
177 |
if "items" in res:
|
178 |
for item in res["items"]:
|
179 |
+
# Check if the result is relevant
|
180 |
if any(keyword in item["title"].lower() or keyword in item["snippet"].lower()
|
181 |
for keyword in ["law", "legal", "court", "case", "attorney", "lawyer"]):
|
182 |
result = {
|
|
|
228 |
"cost_estimates": cost_estimates
|
229 |
})
|
230 |
|
231 |
+
return results[:3] # Return top 3 results with their cost estimates
|
232 |
|
233 |
def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
|
234 |
"""Performs a comprehensive analysis of the document, including web and Wikipedia searches."""
|
|
|
245 |
|
246 |
return {
|
247 |
"document_analysis": document_analysis,
|
248 |
+
"related_articles": web_results or [], # Ensure that this this is always a list
|
249 |
"wikipedia_summary": wiki_results
|
250 |
}
|
251 |
except Exception as e:
|
|
|
258 |
|
259 |
def search_wikipedia(query: str, sentences: int = 2) -> Dict[str, str]:
|
260 |
try:
|
261 |
+
# Ensures that the query is a string before slicing
|
262 |
truncated_query = str(query)[:300]
|
263 |
|
264 |
# Search Wikipedia
|
|
|
275 |
summary = wikipedia.summary(page.title, sentences=sentences, auto_suggest=False)
|
276 |
return {"summary": summary, "url": page.url, "title": page.title}
|
277 |
except wikipedia.exceptions.DisambiguationError as e:
|
|
|
278 |
try:
|
279 |
page = wikipedia.page(e.options[0], auto_suggest=False)
|
280 |
summary = wikipedia.summary(page.title, sentences=sentences, auto_suggest=False)
|
|
|
452 |
|
453 |
justia_results = justia_soup.find_all('div', class_='g')
|
454 |
|
455 |
+
for result in justia_results[:5]: # Limits it to top 5 results
|
456 |
title_elem = result.find('h3')
|
457 |
link_elem = result.find('a')
|
458 |
snippet_elem = result.find('div', class_='VwiC3b')
|
|
|
462 |
link = link_elem['href']
|
463 |
snippet = snippet_elem.text
|
464 |
|
465 |
+
# it extract case name and citation from the title
|
466 |
case_info = title.split(' - ')
|
467 |
if len(case_info) >= 2:
|
468 |
case_name = case_info[0]
|
|
|
516 |
analysis_prompt = f"Analyze the following legal document and provide a summary, potential issues, and key clauses:\n\n{content}"
|
517 |
document_analysis = get_ai_response(analysis_prompt)
|
518 |
|
|
|
519 |
topic_extraction_prompt = f"Extract the main topics or keywords from the following document summary:\n\n{document_analysis}"
|
520 |
topics = get_ai_response(topic_extraction_prompt)
|
521 |
|
|
|
524 |
|
525 |
return {
|
526 |
"document_analysis": document_analysis,
|
527 |
+
"related_articles": web_results or [],
|
528 |
"wikipedia_summary": wiki_results
|
529 |
}
|
530 |
except Exception as e:
|
|
|
644 |
|
645 |
service = build("customsearch", "v1", developerKey=api_key)
|
646 |
|
|
|
647 |
res = service.cse().list(q=query, cx=cse_id, num=num_results).execute()
|
648 |
|
649 |
results = []
|
|
|
959 |
# Display the Plotly chart
|
960 |
st.plotly_chart(fig, use_container_width=True)
|
961 |
|
962 |
+
# Display Statistics
|
963 |
st.subheader("Case Statistics")
|
964 |
total_cases = df['Number of Cases'].sum()
|
965 |
avg_cases = df['Number of Cases'].mean()
|
|
|
971 |
col2.metric("Average Cases per Year", f"{avg_cases:,.0f}")
|
972 |
col3.metric("Peak Year", f"{max_year}")
|
973 |
|
974 |
+
# Trend Description
|
975 |
st.write("Trend Description:", get_trend_description(df))
|
976 |
|
977 |
if st.session_state.current_data is not None:
|
978 |
df = st.session_state.current_data
|
979 |
|
980 |
+
# Interactive Analysis Section
|
981 |
st.subheader("Interactive Analysis")
|
982 |
|
983 |
# Year-over-Year Change
|
|
|
994 |
ma_fig = px.line(df, x='Year', y=['Number of Cases', 'Moving Average'], title=f"{window}-Year Moving Average")
|
995 |
st.plotly_chart(ma_fig, use_container_width=True)
|
996 |
|
997 |
+
# Raw Data
|
998 |
st.subheader("Raw Data")
|
999 |
st.dataframe(df)
|
1000 |
|
1001 |
+
# Download Options
|
1002 |
csv = df.to_csv(index=False)
|
1003 |
st.download_button(
|
1004 |
label="Download data as CSV",
|
|
|
1007 |
mime="text/csv",
|
1008 |
)
|
1009 |
|
1010 |
+
# Additional Information & Data Sources
|
1011 |
st.subheader("Additional Information")
|
1012 |
info = get_additional_info(case_type)
|
1013 |
st.markdown(info)
|
|
|
1342 |
if author_elem:
|
1343 |
author = author_elem.text.strip()
|
1344 |
elif judges and judges[0] != "Not available":
|
1345 |
+
author = judges[0]
|
1346 |
|
1347 |
if author == "Not available":
|
1348 |
self.logger.warning("No author found in the HTML structure, searching in text content")
|
|
|
1361 |
return author
|
1362 |
|
1363 |
def extract_court_opinion(self, soup):
|
|
|
1364 |
article_div = soup.find('article', class_='col-sm-9')
|
1365 |
if not article_div:
|
1366 |
self.logger.error("Could not find the main article div (col-sm-9).")
|
1367 |
return "Case details not available (main article div not found)."
|
1368 |
|
|
|
1369 |
opinion_div = article_div.find('div', class_='tab-content')
|
1370 |
if not opinion_div:
|
1371 |
self.logger.error("Could not find the case details content (tab-content div).")
|
1372 |
return "Case details not available (tab-content div not found)."
|
1373 |
|
|
|
1374 |
case_details = opinion_div.get_text(separator='\n', strip=True)
|
1375 |
|
1376 |
# Clean up the text
|
1377 |
+
case_details = re.sub(r'\n+', '\n', case_details)
|
1378 |
+
case_details = re.sub(r'\s+', ' ', case_details)
|
1379 |
|
1380 |
return case_details
|
1381 |
|
|
|
1556 |
mime="text/plain"
|
1557 |
)
|
1558 |
# --- Streamlit App ---
|
|
|
1559 |
st.markdown("""
|
1560 |
<style>
|
1561 |
.reportview-container {
|
|
|
1768 |
|
1769 |
elif feature == "Automated Legal Brief Generation":
|
1770 |
automated_legal_brief_generation_ui()
|
1771 |
+
|
|
|
1772 |
st.markdown("---")
|
1773 |
st.markdown(
|
1774 |
"""
|