Johan713 commited on
Commit
de53a8b
·
verified ·
1 Parent(s): 448e443

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +275 -42
app2.py CHANGED
@@ -11,10 +11,11 @@ from docx import Document
11
  from docx.shared import Inches
12
  from datetime import datetime
13
  import re
 
14
  import base64
15
  from typing import List, Dict, Any
16
  import matplotlib.pyplot as plt
17
- from bs4 import BeautifulSoup
18
  from io import StringIO
19
  import wikipedia
20
  from googleapiclient.discovery import build
@@ -22,6 +23,18 @@ from typing import List, Optional
22
  from httpx_sse import SSEError
23
  from difflib import SequenceMatcher
24
  from datetime import datetime
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  # Error handling for optional dependencies
27
  try:
@@ -1035,37 +1048,142 @@ def get_trend_description(df):
1035
  else:
1036
  return "The number of cases has remained relatively stable over the five-year period."
1037
 
 
 
 
 
 
 
1038
  class LegalDataRetriever:
1039
  def __init__(self):
1040
  self.session = requests.Session()
1041
  self.session.headers.update({
1042
- 'User-Agent': 'LegalResearchBot/1.0 (https://www.lexai.com/bot; support@lexai.com)'
 
 
1043
  })
 
 
1044
 
1045
  def search_courtlistener(self, query: str) -> Dict[str, Any]:
1046
  """
1047
  Search CourtListener for case information.
1048
  """
1049
  url = f"https://www.courtlistener.com/api/rest/v3/search/?q={query}&type=o&format=json"
1050
- response = self.session.get(url)
1051
-
1052
- if response.status_code != 200:
1053
- return {"error": "Failed to retrieve data from CourtListener"}
 
 
 
 
 
 
 
1054
 
1055
- data = response.json()
1056
  if data['count'] == 0:
1057
  return {"error": "No results found"}
1058
 
1059
  result = data['results'][0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1060
  return {
1061
  "case_name": result['caseName'],
1062
  "date_filed": result['dateFiled'],
1063
- "docket_number": result['docketNumber'],
1064
  "court": result['court'],
1065
- "status": result['status'],
1066
- "url": f"https://www.courtlistener.com{result['absolute_url']}",
 
 
 
1067
  }
1068
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1069
  def search_justia(self, query: str) -> Dict[str, Any]:
1070
  """
1071
  Search Justia for case information.
@@ -1090,48 +1208,161 @@ class LegalDataRetriever:
1090
  "url": first_result.find('a')['href'],
1091
  }
1092
 
1093
- def get_case_information(query: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1094
  retriever = LegalDataRetriever()
1095
 
1096
- # Determine if the query is a case number
1097
- case_number_pattern = r'\d{1,2}:\d{2}-[a-zA-Z]{2,4}-\d{1,5}'
1098
- is_case_number = re.match(case_number_pattern, query)
1099
-
1100
  # Search CourtListener
1101
  cl_info = retriever.search_courtlistener(query)
1102
  if "error" not in cl_info:
1103
- return f"""
1104
- Case Information from CourtListener:
1105
- Case Name: {cl_info['case_name']}
1106
- Date Filed: {cl_info['date_filed']}
1107
- Docket Number: {cl_info['docket_number']}
1108
- Court: {cl_info['court']}
1109
- Status: {cl_info['status']}
1110
- More Info: {cl_info['url']}
1111
- """
1112
 
1113
- # Search Justia
1114
  justia_info = retriever.search_justia(query)
1115
  if "error" not in justia_info:
1116
- return f"""
1117
- Case Information from Justia:
1118
- Case Name: {justia_info['case_name']}
1119
- Citation: {justia_info['citation']}
1120
- Summary: {justia_info['summary']}
1121
- More Info: {justia_info['url']}
1122
- """
1123
 
1124
- return "Unable to find case information from available sources."
1125
 
1126
- def case_info_retriever():
1127
- st.subheader("Case Information Retriever")
1128
- query = st.text_input("Enter case name, number, or any relevant information:")
1129
- if st.button("Retrieve Case Information"):
1130
- with st.spinner("Retrieving case information..."):
1131
- result = get_case_information(query)
1132
- st.markdown(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1133
 
 
1134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1135
  # --- Streamlit App ---
1136
  # Custom CSS to improve the overall look
1137
  st.markdown("""
@@ -1180,12 +1411,12 @@ st.title("Lex AI - Advanced Legal Assistant")
1180
 
1181
  # Sidebar with feature selection
1182
  with st.sidebar:
1183
- st.title(" AI")
1184
  st.subheader("Advanced Legal Assistant")
1185
 
1186
  feature = st.selectbox(
1187
  "Select a feature",
1188
- ["Legal Chatbot", "Document Analysis", "Case Precedent Finder", "Legal Cost Estimator", "Contract Analysis", "Case Trend Visualizer", "Case Information Retrieval"]
1189
  )
1190
  if feature == "Legal Chatbot":
1191
  st.subheader("Legal Chatbot")
@@ -1348,6 +1579,8 @@ elif feature == "Case Trend Visualizer":
1348
  elif feature == "Case Information Retrieval":
1349
  case_info_retriever()
1350
 
 
 
1351
  # Add a footer with a disclaimer
1352
  # Footer
1353
  st.markdown("---")
 
11
  from docx.shared import Inches
12
  from datetime import datetime
13
  import re
14
+ import logging
15
  import base64
16
  from typing import List, Dict, Any
17
  import matplotlib.pyplot as plt
18
+ from bs4 import BeautifulSoup, NavigableString, Tag
19
  from io import StringIO
20
  import wikipedia
21
  from googleapiclient.discovery import build
 
23
  from httpx_sse import SSEError
24
  from difflib import SequenceMatcher
25
  from datetime import datetime
26
+ import spacy
27
+ import time
28
+ from selenium import webdriver
29
+ from selenium.webdriver.chrome.options import Options
30
+ from selenium.webdriver.common.by import By
31
+ from selenium.webdriver.support.ui import WebDriverWait
32
+ from selenium.webdriver.support import expected_conditions as EC
33
+ from sklearn.feature_extraction.text import TfidfVectorizer
34
+ from sklearn.metrics.pairwise import cosine_similarity
35
+ import networkx as nx
36
+
37
+ nlp = spacy.load("en_core_web_sm")
38
 
39
  # Error handling for optional dependencies
40
  try:
 
1048
  else:
1049
  return "The number of cases has remained relatively stable over the five-year period."
1050
 
1051
+ from bs4 import BeautifulSoup, NavigableString, Tag
1052
+ import requests
1053
+ import time
1054
+ import logging
1055
+ import re
1056
+
1057
  class LegalDataRetriever:
1058
  def __init__(self):
1059
  self.session = requests.Session()
1060
  self.session.headers.update({
1061
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
1062
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
1063
+ 'Accept-Language': 'en-US,en;q=0.5',
1064
  })
1065
+ logging.basicConfig(level=logging.DEBUG)
1066
+ self.logger = logging.getLogger(__name__)
1067
 
1068
  def search_courtlistener(self, query: str) -> Dict[str, Any]:
1069
  """
1070
  Search CourtListener for case information.
1071
  """
1072
  url = f"https://www.courtlistener.com/api/rest/v3/search/?q={query}&type=o&format=json"
1073
+ for attempt in range(3): # Retry up to 3 times
1074
+ try:
1075
+ response = self.session.get(url)
1076
+ response.raise_for_status()
1077
+ data = response.json()
1078
+ break
1079
+ except (requests.RequestException, ValueError) as e:
1080
+ self.logger.error(f"Attempt {attempt + 1} failed: {e}")
1081
+ if attempt == 2:
1082
+ return {"error": f"Failed to retrieve or parse data from CourtListener: {e}"}
1083
+ time.sleep(2) # Wait before retrying
1084
 
 
1085
  if data['count'] == 0:
1086
  return {"error": "No results found"}
1087
 
1088
  result = data['results'][0]
1089
+ case_url = f"https://www.courtlistener.com{result['absolute_url']}"
1090
+
1091
+ try:
1092
+ case_response = self.session.get(case_url)
1093
+ case_response.raise_for_status()
1094
+ soup = BeautifulSoup(case_response.text, 'html.parser')
1095
+ except requests.RequestException as e:
1096
+ self.logger.error(f"Failed to retrieve case page: {e}")
1097
+ return {"error": f"Failed to retrieve case page: {e}"}
1098
+
1099
+ judges = self.extract_judges(soup)
1100
+ author = self.extract_author(soup, judges)
1101
+ court_opinion = self.extract_court_opinion(soup)
1102
+
1103
  return {
1104
  "case_name": result['caseName'],
1105
  "date_filed": result['dateFiled'],
1106
+ "docket_number": result.get('docketNumber', 'Not available'),
1107
  "court": result['court'],
1108
+ "status": result.get('status', 'Not available'),
1109
+ "url": case_url,
1110
+ "judges": judges,
1111
+ "author": author,
1112
+ "court_opinion": court_opinion
1113
  }
1114
 
1115
+ def extract_judges(self, soup):
1116
+ judges = []
1117
+ judge_elements = soup.find_all('a', class_='judge-link')
1118
+ if judge_elements:
1119
+ judges = [judge.text.strip() for judge in judge_elements]
1120
+ else:
1121
+ judge_info = soup.find('p', class_='bottom')
1122
+ if judge_info:
1123
+ judges = [j.strip() for j in judge_info.text.split(',') if j.strip()]
1124
+
1125
+ if not judges:
1126
+ self.logger.warning("No judges found in the HTML structure, searching in text content")
1127
+ text_content = soup.get_text()
1128
+ judge_patterns = [
1129
+ r'(?:Judge|Justice)[s]?:?\s*(.*?)\.',
1130
+ r'(?:Before|Authored by):?\s*(.*?)\.',
1131
+ r'(.*?),\s*(?:Circuit Judge|District Judge|Chief Judge)'
1132
+ ]
1133
+ for pattern in judge_patterns:
1134
+ judge_match = re.search(pattern, text_content, re.IGNORECASE)
1135
+ if judge_match:
1136
+ judges = [j.strip() for j in judge_match.group(1).split(',') if j.strip()]
1137
+ break
1138
+
1139
+ return judges if judges else ["Not available"]
1140
+
1141
+ def extract_author(self, soup, judges):
1142
+ author = "Not available"
1143
+ author_elem = soup.find('span', class_='author')
1144
+ if author_elem:
1145
+ author = author_elem.text.strip()
1146
+ elif judges and judges[0] != "Not available":
1147
+ author = judges[0] # Assume the first judge is the author if not explicitly stated
1148
+
1149
+ if author == "Not available":
1150
+ self.logger.warning("No author found in the HTML structure, searching in text content")
1151
+ text_content = soup.get_text()
1152
+ author_patterns = [
1153
+ r'(?:Author|Written by):?\s*(.*?)\.',
1154
+ r'(.*?)\s*delivered the opinion of the court',
1155
+ r'(.*?),\s*(?:Circuit Judge|District Judge|Chief Judge).*?writing for the court'
1156
+ ]
1157
+ for pattern in author_patterns:
1158
+ author_match = re.search(pattern, text_content, re.IGNORECASE)
1159
+ if author_match:
1160
+ author = author_match.group(1).strip()
1161
+ break
1162
+
1163
+ return author
1164
+
1165
+ def extract_court_opinion(self, soup):
1166
+ # Target the article tag with class col-sm-9 first
1167
+ article_div = soup.find('article', class_='col-sm-9')
1168
+ if not article_div:
1169
+ self.logger.error("Could not find the main article div (col-sm-9).")
1170
+ return "Case details not available (main article div not found)."
1171
+
1172
+ # Find the tab-content div within the article div
1173
+ opinion_div = article_div.find('div', class_='tab-content')
1174
+ if not opinion_div:
1175
+ self.logger.error("Could not find the case details content (tab-content div).")
1176
+ return "Case details not available (tab-content div not found)."
1177
+
1178
+ # Extract all text from the tab-content div
1179
+ case_details = opinion_div.get_text(separator='\n', strip=True)
1180
+
1181
+ # Clean up the text
1182
+ case_details = re.sub(r'\n+', '\n', case_details) # Remove multiple newlines
1183
+ case_details = re.sub(r'\s+', ' ', case_details) # Remove extra whitespace
1184
+
1185
+ return case_details
1186
+
1187
  def search_justia(self, query: str) -> Dict[str, Any]:
1188
  """
1189
  Search Justia for case information.
 
1208
  "url": first_result.find('a')['href'],
1209
  }
1210
 
1211
+ def case_info_retriever():
1212
+ st.subheader("Case Information Retriever")
1213
+ query = st.text_input("Enter case name, number, or any relevant information:")
1214
+ if st.button("Retrieve Case Information"):
1215
+ with st.spinner("Retrieving case information..."):
1216
+ result = get_case_information(query)
1217
+
1218
+ if "error" in result:
1219
+ st.error(result["error"])
1220
+ else:
1221
+ st.success("Case information retrieved successfully!")
1222
+
1223
+ # Display case information
1224
+ st.subheader("Case Details")
1225
+ col1, col2 = st.columns(2)
1226
+ with col1:
1227
+ st.write(f"**Case Name:** {result['case_name']}")
1228
+ st.write(f"**Date Filed:** {result['date_filed']}")
1229
+ st.write(f"**Docket Number:** {result['docket_number']}")
1230
+ with col2:
1231
+ st.write(f"**Court:** {result['court']}")
1232
+ st.write(f"**Status:** {result['status']}")
1233
+ st.write(f"**[View on CourtListener]({result['url']})**")
1234
+
1235
+ # Display judges and author
1236
+ st.subheader("Judges and Author")
1237
+ st.write(f"**Judges:** {', '.join(result['judges'])}")
1238
+ st.write(f"**Author:** {result['author']}")
1239
+
1240
+ # Display case details (formerly court opinion)
1241
+ st.subheader("Case Details")
1242
+ st.markdown(result['court_opinion'])
1243
+
1244
+ # Option to download the case information
1245
+ case_info_text = f"""
1246
+ Case Name: {result['case_name']}
1247
+ Date Filed: {result['date_filed']}
1248
+ Docket Number: {result['docket_number']}
1249
+ Court: {result['court']}
1250
+ Status: {result['status']}
1251
+ Judges: {', '.join(result['judges'])}
1252
+ Author: {result['author']}
1253
+
1254
+ Case Details:
1255
+ {result['court_opinion']}
1256
+
1257
+ View on CourtListener: {result['url']}
1258
+ """
1259
+
1260
+ st.download_button(
1261
+ label="Download Case Information",
1262
+ data=case_info_text,
1263
+ file_name="case_information.txt",
1264
+ mime="text/plain"
1265
+ )
1266
+
1267
+ def get_case_information(query: str) -> Dict[str, Any]:
1268
  retriever = LegalDataRetriever()
1269
 
 
 
 
 
1270
  # Search CourtListener
1271
  cl_info = retriever.search_courtlistener(query)
1272
  if "error" not in cl_info:
1273
+ return cl_info
 
 
 
 
 
 
 
 
1274
 
1275
+ # Search Justia if CourtListener fails
1276
  justia_info = retriever.search_justia(query)
1277
  if "error" not in justia_info:
1278
+ return justia_info
 
 
 
 
 
 
1279
 
1280
+ return {"error": "Unable to find case information from available sources."}
1281
 
1282
+ def extract_text_from_document(uploaded_file):
1283
+ text = ""
1284
+ if uploaded_file.type == "application/pdf":
1285
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
1286
+ for page in pdf_reader.pages:
1287
+ text += page.extract_text()
1288
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
1289
+ doc = docx.Document(uploaded_file)
1290
+ for para in doc.paragraphs:
1291
+ text += para.text + "\n"
1292
+ else:
1293
+ text = uploaded_file.getvalue().decode("utf-8")
1294
+ return text
1295
+
1296
+ def split_text(text, max_chunk_size=4000):
1297
+ return [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
1298
+
1299
+ def generate_legal_brief(case_info):
1300
+ chunks = split_text(case_info)
1301
+ full_brief = ""
1302
+
1303
+ for i, chunk in enumerate(chunks):
1304
+ prompt = f"""Generate a part of a comprehensive legal brief based on the following information. This is part {i+1} of {len(chunks)}. Focus on:
1305
+ 1. A summary of the facts
1306
+ 2. Identification of key legal issues
1307
+ 3. Relevant laws and precedents
1308
+ 4. Legal analysis
1309
+ 5. Conclusion and recommendations
1310
+ 6. An analysis of why the winning party won
1311
+ 7. A review of how the losing party could have potentially won
1312
+
1313
+ Case Information (Part {i+1}/{len(chunks)}):
1314
+ {chunk}
1315
 
1316
+ Please provide a detailed and thorough response for the relevant sections based on this part of the information."""
1317
 
1318
+ try:
1319
+ response = ai71.chat.completions.create(
1320
+ model="tiiuae/falcon-180b-chat",
1321
+ messages=[{"role": "user", "content": prompt}],
1322
+ stream=False,
1323
+ )
1324
+ full_brief += response.choices[0].message.content + "\n\n"
1325
+ except Exception as e:
1326
+ st.error(f"Error generating part {i+1} of the legal brief: {str(e)}")
1327
+ return "Unable to generate complete legal brief due to an error."
1328
+
1329
+ return full_brief
1330
+
1331
+ def automated_legal_brief_generation_ui():
1332
+ st.title("Automated Legal Brief Generation")
1333
+
1334
+ if 'legal_brief' not in st.session_state:
1335
+ st.session_state.legal_brief = ""
1336
+
1337
+ input_method = st.radio("Choose input method:", ("Text Input", "Document Upload"))
1338
+
1339
+ if input_method == "Text Input":
1340
+ case_info = st.text_area("Enter the case information:", height=300)
1341
+ else:
1342
+ uploaded_file = st.file_uploader("Upload a document containing case details (PDF, DOCX, or TXT)", type=["pdf", "docx", "txt"])
1343
+ if uploaded_file is not None:
1344
+ case_info = extract_text_from_document(uploaded_file)
1345
+ else:
1346
+ case_info = ""
1347
+
1348
+ if st.button("Generate Legal Brief"):
1349
+ if case_info:
1350
+ with st.spinner("Generating comprehensive legal brief..."):
1351
+ st.session_state.legal_brief = generate_legal_brief(case_info)
1352
+ st.success("Legal brief generated successfully!")
1353
+ else:
1354
+ st.warning("Please provide case information to generate the brief.")
1355
+
1356
+ if st.session_state.legal_brief:
1357
+ st.subheader("Generated Legal Brief")
1358
+ st.text_area("Legal Brief", st.session_state.legal_brief, height=400)
1359
+
1360
+ st.download_button(
1361
+ label="Download Legal Brief",
1362
+ data=st.session_state.legal_brief,
1363
+ file_name="legal_brief.txt",
1364
+ mime="text/plain"
1365
+ )
1366
  # --- Streamlit App ---
1367
  # Custom CSS to improve the overall look
1368
  st.markdown("""
 
1411
 
1412
  # Sidebar with feature selection
1413
  with st.sidebar:
1414
+ st.title("Lex AI")
1415
  st.subheader("Advanced Legal Assistant")
1416
 
1417
  feature = st.selectbox(
1418
  "Select a feature",
1419
+ ["Legal Chatbot", "Document Analysis", "Case Precedent Finder", "Legal Cost Estimator", "Contract Analysis", "Case Trend Visualizer", "Case Information Retrieval", "Automated Legal Brief Generation"]
1420
  )
1421
  if feature == "Legal Chatbot":
1422
  st.subheader("Legal Chatbot")
 
1579
  elif feature == "Case Information Retrieval":
1580
  case_info_retriever()
1581
 
1582
+ elif feature == "Automated Legal Brief Generation":
1583
+ automated_legal_brief_generation_ui()
1584
  # Add a footer with a disclaimer
1585
  # Footer
1586
  st.markdown("---")