Johan713 commited on
Commit
b99151b
·
verified ·
1 Parent(s): 26b1e1e

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +247 -96
app2.py CHANGED
@@ -295,62 +295,176 @@ def extract_important_info(text: str) -> str:
295
  prompt = f"Extract and highlight the most important legal information from the following text. Use markdown to emphasize key points:\n\n{text}"
296
  return get_ai_response(prompt)
297
 
298
- def fetch_detailed_content(url: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  try:
300
- response = requests.get(url)
301
- response.raise_for_status()
302
- soup = BeautifulSoup(response.text, 'html.parser')
303
-
304
- # Extract main content (this may need to be adjusted based on the structure of the target websites)
305
- main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content')
306
-
307
- if main_content:
308
- # Extract text from paragraphs
309
- paragraphs = main_content.find_all('p')
310
- content = "\n\n".join([p.get_text() for p in paragraphs])
311
 
312
- # Limit content to a reasonable length (e.g., first 1000 characters)
313
- return content[:1000] + "..." if len(content) > 1000 else content
314
- else:
315
- return "Unable to extract detailed content from the webpage."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  except Exception as e:
317
- return f"Error fetching detailed content: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
  def query_public_case_law(query: str) -> List[Dict[str, Any]]:
320
- """
321
- Query publicly available case law databases (Justia and CourtListener)
322
- and perform a web search to find related cases.
323
- """
324
  cases = []
325
-
326
- # --- Justia Search (Using BeautifulSoup) ---
327
- justia_url = f"https://law.justia.com/cases/?q={query}"
328
  justia_headers = {
329
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
330
  }
 
331
  try:
332
  justia_response = requests.get(justia_url, headers=justia_headers)
333
  justia_response.raise_for_status()
334
  justia_soup = BeautifulSoup(justia_response.text, 'html.parser')
335
- justia_results = justia_soup.find_all('div', class_='case-listing')
336
-
337
- for result in justia_results[:3]: # Limit Justia results to 3
338
- title = result.find('h6').text.strip()
339
- citation = result.find('p', class_='citation').text.strip()
340
- summary = result.find('p', class_='summary').text.strip()
341
- url = result.find('a')['href']
342
- cases.append({
343
- "source": "Justia",
344
- "case_name": title,
345
- "citation": citation,
346
- "summary": summary,
347
- "url": url
348
- })
349
-
350
  except requests.RequestException as e:
351
  print(f"Error querying Justia: {e}")
352
 
353
- # --- CourtListener Search (Using API) ---
354
  courtlistener_url = f"https://www.courtlistener.com/api/rest/v3/search/?q={query}&type=o&format=json"
355
  for attempt in range(3): # Retry up to 3 times
356
  try:
@@ -361,9 +475,7 @@ def query_public_case_law(query: str) -> List[Dict[str, Any]]:
361
  except (requests.RequestException, ValueError) as e:
362
  print(f"Attempt {attempt + 1} failed: {e}")
363
  if attempt == 2:
364
- print(
365
- f"Failed to retrieve or parse data from CourtListener: {e}"
366
- )
367
  break
368
  time.sleep(2)
369
 
@@ -407,42 +519,61 @@ def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
407
  "wikipedia_summary": {"summary": "Error occurred", "url": "", "title": ""}
408
  }
409
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  def find_case_precedents(case_details: str) -> Dict[str, Any]:
411
  """Finds relevant case precedents based on provided details."""
412
  try:
413
- # Initial AI analysis of the case details
414
- analysis_prompt = f"Analyze the following case details and identify key legal concepts and relevant precedents:\n\n{case_details}"
415
- initial_analysis = get_ai_response(analysis_prompt)
416
-
417
  # Query public case law databases
418
  public_cases = query_public_case_law(case_details)
419
-
420
- # Perform web search (existing functionality)
421
  web_results = search_web(f"legal precedent {case_details}", num_results=3)
422
-
423
- # Perform Wikipedia search (existing functionality)
424
  wiki_result = search_wikipedia(f"legal case {case_details}")
425
-
426
  # Compile all information
427
- compilation_prompt = f"""Compile a comprehensive summary of case precedents based on the following information:
 
428
 
429
- Initial Analysis: {initial_analysis}
430
 
431
  Public Case Law Results:
432
- {public_cases}
433
 
434
  Web Search Results:
435
- {web_results}
436
 
437
  Wikipedia Information:
438
  {wiki_result['summary']}
439
 
440
- Provide a well-structured summary highlighting the most relevant precedents and legal principles."""
 
 
 
 
 
441
 
442
- final_summary = get_ai_response(compilation_prompt)
443
-
444
  return {
445
- "summary": final_summary,
446
  "public_cases": public_cases,
447
  "web_results": web_results,
448
  "wikipedia": wiki_result
@@ -1544,14 +1675,9 @@ elif feature == "Document Analysis":
1544
  elif feature == "Case Precedent Finder":
1545
  st.subheader("Case Precedent Finder")
1546
 
1547
- # Initialize session state for precedents if not exists
1548
  if 'precedents' not in st.session_state:
1549
  st.session_state.precedents = None
1550
 
1551
- # Initialize session state for visibility toggles if not exists
1552
- if 'visibility_toggles' not in st.session_state:
1553
- st.session_state.visibility_toggles = {}
1554
-
1555
  case_details = st.text_area("Enter case details:")
1556
  if st.button("Find Precedents"):
1557
  with st.spinner("Searching for relevant case precedents..."):
@@ -1560,51 +1686,76 @@ elif feature == "Case Precedent Finder":
1560
  except Exception as e:
1561
  st.error(f"An error occurred while finding case precedents: {str(e)}")
1562
 
1563
- # Display results if precedents are available
1564
  if st.session_state.precedents:
1565
  precedents = st.session_state.precedents
1566
 
1567
  st.write("### Summary of Relevant Case Precedents")
1568
  st.markdown(precedents["summary"])
 
1569
 
1570
  st.write("### Related Cases from Public Databases")
1571
  for i, case in enumerate(precedents["public_cases"], 1):
1572
- st.write(f"**{i}. {case['case_name']} ({case['source']}) - {case.get('citation', '')}**")
1573
- st.write(f"Summary: {case.get('summary', 'Not available')}")
1574
- st.write(f"[Read full case]({case['url']})")
1575
- st.write("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1576
 
1577
  st.write("### Additional Web Results")
1578
  for i, result in enumerate(precedents["web_results"], 1):
1579
- st.write(f"**{i}. [{result['title']}]({result['link']})**")
1580
-
1581
- # Create a unique key for each toggle
1582
- toggle_key = f"toggle_{i}"
1583
-
1584
- # Initialize the toggle state if it doesn't exist
1585
- if toggle_key not in st.session_state.visibility_toggles:
1586
- st.session_state.visibility_toggles[toggle_key] = False
1587
-
1588
- # Create a button to toggle visibility
1589
- if st.button(f"{'Hide' if st.session_state.visibility_toggles[toggle_key] else 'Show'} Full Details for Result {i}", key=f"button_{i}"):
1590
- st.session_state.visibility_toggles[toggle_key] = not st.session_state.visibility_toggles[toggle_key]
1591
-
1592
- # Show full details if toggle is True
1593
- if st.session_state.visibility_toggles[toggle_key]:
1594
- # Fetch and display more detailed content
1595
- detailed_content = fetch_detailed_content(result['link'])
1596
- st.markdown(detailed_content)
1597
- else:
1598
- # Show a brief summary when details are hidden
1599
- brief_summary = result['snippet'].split('\n')[0][:200] + "..." if len(result['snippet']) > 200 else result['snippet'].split('\n')[0]
1600
- st.write(f"Brief Summary: {brief_summary}")
1601
-
1602
- st.write("---")
1603
 
1604
  st.write("### Wikipedia Information")
1605
  wiki_info = precedents["wikipedia"]
1606
- st.write(f"**[{wiki_info['title']}]({wiki_info['url']})**")
1607
- st.markdown(wiki_info['summary'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1608
 
1609
  elif feature == "Legal Cost Estimator":
1610
  legal_cost_estimator_ui()
 
295
  prompt = f"Extract and highlight the most important legal information from the following text. Use markdown to emphasize key points:\n\n{text}"
296
  return get_ai_response(prompt)
297
 
298
+ user_agents = [
299
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
300
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15',
301
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
302
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36'
303
+ ]
304
+
305
+ # Rate limiting parameters
306
+ MIN_DELAY = 3 # Minimum delay between requests in seconds
307
+ MAX_DELAY = 10 # Maximum delay between requests in seconds
308
+ last_request_time = 0
309
+
310
+ def get_random_user_agent():
311
+ return random.choice(user_agents)
312
+
313
+ def rate_limit():
314
+ global last_request_time
315
+ current_time = time.time()
316
+ time_since_last_request = current_time - last_request_time
317
+ if time_since_last_request < MIN_DELAY:
318
+ sleep_time = random.uniform(MIN_DELAY, MAX_DELAY)
319
+ time.sleep(sleep_time)
320
+ last_request_time = time.time()
321
+
322
+ def fetch_detailed_content(url):
323
+ rate_limit()
324
+
325
+ chrome_options = Options()
326
+ chrome_options.add_argument("--headless")
327
+ chrome_options.add_argument("--no-sandbox")
328
+ chrome_options.add_argument("--disable-dev-shm-usage")
329
+ chrome_options.add_argument(f"user-agent={get_random_user_agent()}")
330
+
331
  try:
332
+ with webdriver.Chrome(options=chrome_options) as driver:
333
+ driver.get(url)
 
 
 
 
 
 
 
 
 
334
 
335
+ # Wait for the main content to load
336
+ WebDriverWait(driver, 20).until(
337
+ EC.presence_of_element_located((By.TAG_NAME, "body"))
338
+ )
339
+
340
+ # Scroll to load any lazy-loaded content
341
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
342
+ time.sleep(2) # Wait for any dynamic content to load
343
+
344
+ # Get the page source after JavaScript execution
345
+ page_source = driver.page_source
346
+
347
+ # Use BeautifulSoup for parsing
348
+ soup = BeautifulSoup(page_source, 'html.parser')
349
+
350
+ # Remove script and style elements
351
+ for script in soup(["script", "style"]):
352
+ script.decompose()
353
+
354
+ # Extract main content (customize based on the website structure)
355
+ main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile('content|main'))
356
+
357
+ if not main_content:
358
+ main_content = soup.body
359
+
360
+ # Extract text content
361
+ text_content = main_content.get_text(separator='\n', strip=True)
362
+
363
+ # Clean and process the content
364
+ cleaned_content = clean_content(text_content)
365
+
366
+ return cleaned_content
367
+
368
  except Exception as e:
369
+ print(f"Error fetching content: {e}")
370
+ return f"Unable to fetch detailed content. Error: {str(e)}"
371
+
372
+ def clean_content(text):
373
+ # Remove extra whitespace and newlines
374
+ text = re.sub(r'\s+', ' ', text).strip()
375
+
376
+ # Remove any remaining HTML tags
377
+ text = re.sub(r'<[^>]+>', '', text)
378
+
379
+ # Remove special characters and digits (customize as needed)
380
+ text = re.sub(r'[^a-zA-Z\s.,;:?!-]', '', text)
381
+
382
+ # Split into sentences
383
+ sentences = re.split(r'(?<=[.!?])\s+', text)
384
+
385
+ # Remove short sentences (likely to be noise)
386
+ sentences = [s for s in sentences if len(s.split()) > 3]
387
+
388
+ # Join sentences back together
389
+ cleaned_text = ' '.join(sentences)
390
+
391
+ return cleaned_text
392
+
393
+ def extract_structured_data(soup):
394
+ structured_data = {}
395
+
396
+ # Extract title
397
+ title = soup.find('title')
398
+ if title:
399
+ structured_data['title'] = title.get_text(strip=True)
400
+
401
+ # Extract meta description
402
+ meta_desc = soup.find('meta', attrs={'name': 'description'})
403
+ if meta_desc:
404
+ structured_data['description'] = meta_desc.get('content', '')
405
+
406
+ # Extract headings
407
+ headings = []
408
+ for tag in ['h1', 'h2', 'h3']:
409
+ for heading in soup.find_all(tag):
410
+ headings.append({
411
+ 'level': tag,
412
+ 'text': heading.get_text(strip=True)
413
+ })
414
+ structured_data['headings'] = headings
415
+
416
+ # Extract links
417
+ links = []
418
+ for link in soup.find_all('a', href=True):
419
+ links.append({
420
+ 'text': link.get_text(strip=True),
421
+ 'href': link['href']
422
+ })
423
+ structured_data['links'] = links
424
+
425
+ # Extract images
426
+ images = []
427
+ for img in soup.find_all('img', src=True):
428
+ images.append({
429
+ 'src': img['src'],
430
+ 'alt': img.get('alt', '')
431
+ })
432
+ structured_data['images'] = images
433
+
434
+ return structured_data
435
 
436
  def query_public_case_law(query: str) -> List[Dict[str, Any]]:
437
+ """Query publicly available case law databases (Justia and CourtListener) to find related cases."""
 
 
 
438
  cases = []
439
+
440
+ # Justia Search using Google
441
+ justia_url = f"https://www.google.com/search?q={query}+case+law+site:law.justia.com"
442
  justia_headers = {
443
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
444
  }
445
+
446
  try:
447
  justia_response = requests.get(justia_url, headers=justia_headers)
448
  justia_response.raise_for_status()
449
  justia_soup = BeautifulSoup(justia_response.text, 'html.parser')
450
+ justia_results = justia_soup.find_all('div', class_='g')
451
+
452
+ for result in justia_results[:3]: # Limit to 3 results
453
+ title_elem = result.find('h3', class_='r')
454
+ if title_elem and 'law.justia.com' in title_elem.find('a')['href']:
455
+ title = title_elem.text.strip()
456
+ url = title_elem.find('a')['href']
457
+ snippet = result.find('div', class_='s').text.strip()
458
+ cases.append({
459
+ "source": "Justia",
460
+ "case_name": title,
461
+ "summary": snippet,
462
+ "url": url
463
+ })
 
464
  except requests.RequestException as e:
465
  print(f"Error querying Justia: {e}")
466
 
467
+ # CourtListener Search
468
  courtlistener_url = f"https://www.courtlistener.com/api/rest/v3/search/?q={query}&type=o&format=json"
469
  for attempt in range(3): # Retry up to 3 times
470
  try:
 
475
  except (requests.RequestException, ValueError) as e:
476
  print(f"Attempt {attempt + 1} failed: {e}")
477
  if attempt == 2:
478
+ print(f"Failed to retrieve or parse data from CourtListener: {e}")
 
 
479
  break
480
  time.sleep(2)
481
 
 
519
  "wikipedia_summary": {"summary": "Error occurred", "url": "", "title": ""}
520
  }
521
 
522
+ def format_public_cases(cases: List[Dict[str, Any]]) -> str:
523
+ """Format public cases for the AI prompt."""
524
+ formatted = ""
525
+ for case in cases:
526
+ formatted += f"Source: {case['source']}\n"
527
+ formatted += f"Case Name: {case['case_name']}\n"
528
+ if 'citation' in case:
529
+ formatted += f"Citation: {case['citation']}\n"
530
+ if 'summary' in case:
531
+ formatted += f"Summary: {case['summary']}\n"
532
+ if 'date_filed' in case:
533
+ formatted += f"Date Filed: {case['date_filed']}\n"
534
+ if 'docket_number' in case:
535
+ formatted += f"Docket Number: {case['docket_number']}\n"
536
+ if 'court' in case:
537
+ formatted += f"Court: {case['court']}\n"
538
+ formatted += "\n"
539
+ return formatted
540
+
541
  def find_case_precedents(case_details: str) -> Dict[str, Any]:
542
  """Finds relevant case precedents based on provided details."""
543
  try:
 
 
 
 
544
  # Query public case law databases
545
  public_cases = query_public_case_law(case_details)
546
+
547
+ # Perform web search
548
  web_results = search_web(f"legal precedent {case_details}", num_results=3)
549
+
550
+ # Perform Wikipedia search
551
  wiki_result = search_wikipedia(f"legal case {case_details}")
552
+
553
  # Compile all information
554
+ compilation_prompt = f"""
555
+ Analyze and summarize the following case law information, focusing solely on factual elements and legal principles. Do not include any speculative or fictional content:
556
 
557
+ Case Details: {case_details}
558
 
559
  Public Case Law Results:
560
+ {format_public_cases(public_cases)}
561
 
562
  Web Search Results:
563
+ {format_web_results(web_results)}
564
 
565
  Wikipedia Information:
566
  {wiki_result['summary']}
567
 
568
+ Provide a concise, fact-based summary highlighting the most relevant precedents and legal principles.
569
+ Ensure all information is directly sourced from the provided materials.
570
+ Do not introduce any speculative or hypothetical scenarios.
571
+ """
572
+
573
+ summary = get_ai_response(compilation_prompt)
574
 
 
 
575
  return {
576
+ "summary": summary,
577
  "public_cases": public_cases,
578
  "web_results": web_results,
579
  "wikipedia": wiki_result
 
1675
  elif feature == "Case Precedent Finder":
1676
  st.subheader("Case Precedent Finder")
1677
 
 
1678
  if 'precedents' not in st.session_state:
1679
  st.session_state.precedents = None
1680
 
 
 
 
 
1681
  case_details = st.text_area("Enter case details:")
1682
  if st.button("Find Precedents"):
1683
  with st.spinner("Searching for relevant case precedents..."):
 
1686
  except Exception as e:
1687
  st.error(f"An error occurred while finding case precedents: {str(e)}")
1688
 
 
1689
  if st.session_state.precedents:
1690
  precedents = st.session_state.precedents
1691
 
1692
  st.write("### Summary of Relevant Case Precedents")
1693
  st.markdown(precedents["summary"])
1694
+ st.warning("Note: This summary is based on real case law and does not include fictional elements.")
1695
 
1696
  st.write("### Related Cases from Public Databases")
1697
  for i, case in enumerate(precedents["public_cases"], 1):
1698
+ with st.expander(f"{i}. {case['case_name']}"):
1699
+ if case['source'] == "Justia":
1700
+ st.write(f"**Source:** Justia")
1701
+ st.write(f"**Summary:** {case['summary']}")
1702
+ elif case['source'] == "CourtListener":
1703
+ st.write(f"**Source:** CourtListener")
1704
+ st.write(f"**Date Filed:** {case['date_filed']}")
1705
+ st.write(f"**Docket Number:** {case['docket_number']}")
1706
+ st.write(f"**Court:** {case['court']}")
1707
+ st.write(f"[Read full case]({case['url']})")
1708
+
1709
+ if st.button(f"Fetch Full Content for Case {i}", key=f"fetch_case_button_{i}"):
1710
+ with st.spinner("Fetching detailed content..."):
1711
+ try:
1712
+ detailed_content, structured_data = fetch_detailed_content(case['url'])
1713
+ st.subheader("Cleaned Content")
1714
+ st.markdown(detailed_content)
1715
+
1716
+ st.subheader("Structured Data")
1717
+ st.json(structured_data)
1718
+ except Exception as e:
1719
+ st.error(f"Unable to fetch detailed content: {str(e)}")
1720
+ st.write("Please visit the source link for full information.")
1721
 
1722
  st.write("### Additional Web Results")
1723
  for i, result in enumerate(precedents["web_results"], 1):
1724
+ with st.expander(f"{i}. {result['title']}"):
1725
+ st.write(f"**Source:** [{result['link']}]({result['link']})")
1726
+ st.write(f"**Snippet:** {result['snippet']}")
1727
+
1728
+ if st.button(f"Fetch Full Content for Result {i}", key=f"fetch_result_button_{i}"):
1729
+ with st.spinner("Fetching detailed content..."):
1730
+ try:
1731
+ detailed_content, structured_data = fetch_detailed_content(result['link'])
1732
+ st.subheader("Cleaned Content")
1733
+ st.markdown(detailed_content)
1734
+
1735
+ st.subheader("Structured Data")
1736
+ st.json(structured_data)
1737
+ except Exception as e:
1738
+ st.error(f"Unable to fetch detailed content: {str(e)}")
1739
+ st.write("Please visit the source link for full information.")
 
 
 
 
 
 
 
 
1740
 
1741
  st.write("### Wikipedia Information")
1742
  wiki_info = precedents["wikipedia"]
1743
+ with st.expander(f"Wikipedia: {wiki_info['title']}"):
1744
+ st.markdown(wiki_info['summary'])
1745
+ st.write(f"[Read more on Wikipedia]({wiki_info['url']})")
1746
+
1747
+ if st.button("Fetch Full Wikipedia Content", key="fetch_wiki_button"):
1748
+ with st.spinner("Fetching detailed Wikipedia content..."):
1749
+ try:
1750
+ detailed_content, structured_data = fetch_detailed_content(wiki_info['url'])
1751
+ st.subheader("Cleaned Content")
1752
+ st.markdown(detailed_content)
1753
+
1754
+ st.subheader("Structured Data")
1755
+ st.json(structured_data)
1756
+ except Exception as e:
1757
+ st.error(f"Unable to fetch detailed Wikipedia content: {str(e)}")
1758
+ st.write("Please visit the Wikipedia link for full information.")
1759
 
1760
  elif feature == "Legal Cost Estimator":
1761
  legal_cost_estimator_ui()