Spaces:

Johan713
/

aAzelll

Sleeping

App Files Files Community

Johan713 commited on Aug 1, 2024

Commit

de53a8b

verified ·

1 Parent(s): 448e443

Update app2.py

Browse files

Files changed (1) hide show

app2.py +275 -42

app2.py CHANGED Viewed

@@ -11,10 +11,11 @@ from docx import Document
 from docx.shared import Inches
 from datetime import datetime
 import re
 import base64
 from typing import List, Dict, Any
 import matplotlib.pyplot as plt
-from bs4 import BeautifulSoup
 from io import StringIO
 import wikipedia
 from googleapiclient.discovery import build
@@ -22,6 +23,18 @@ from typing import List, Optional
 from httpx_sse import SSEError
 from difflib import SequenceMatcher
 from datetime import datetime
 # Error handling for optional dependencies
 try:
@@ -1035,37 +1048,142 @@ def get_trend_description(df):
     else:
         return "The number of cases has remained relatively stable over the five-year period."
 class LegalDataRetriever:
     def __init__(self):
         self.session = requests.Session()
         self.session.headers.update({
-            'User-Agent': 'LegalResearchBot/1.0 (https://www.lexai.com/bot; support@lexai.com)'
         })
     def search_courtlistener(self, query: str) -> Dict[str, Any]:
         """
         Search CourtListener for case information.
         """
         url = f"https://www.courtlistener.com/api/rest/v3/search/?q={query}&type=o&format=json"
-        response = self.session.get(url)
-        if response.status_code != 200:
-            return {"error": "Failed to retrieve data from CourtListener"}
-        data = response.json()
         if data['count'] == 0:
             return {"error": "No results found"}
         result = data['results'][0]
         return {
             "case_name": result['caseName'],
             "date_filed": result['dateFiled'],
-            "docket_number": result['docketNumber'],
             "court": result['court'],
-            "status": result['status'],
-            "url": f"https://www.courtlistener.com{result['absolute_url']}",
         }
     def search_justia(self, query: str) -> Dict[str, Any]:
         """
         Search Justia for case information.
@@ -1090,48 +1208,161 @@ class LegalDataRetriever:
             "url": first_result.find('a')['href'],
         }
-def get_case_information(query: str) -> str:
     retriever = LegalDataRetriever()
-    # Determine if the query is a case number
-    case_number_pattern = r'\d{1,2}:\d{2}-[a-zA-Z]{2,4}-\d{1,5}'
-    is_case_number = re.match(case_number_pattern, query)
     # Search CourtListener
     cl_info = retriever.search_courtlistener(query)
     if "error" not in cl_info:
-        return f"""
-Case Information from CourtListener:
-Case Name: {cl_info['case_name']}
-Date Filed: {cl_info['date_filed']}
-Docket Number: {cl_info['docket_number']}
-Court: {cl_info['court']}
-Status: {cl_info['status']}
-More Info: {cl_info['url']}
-            """
-    # Search Justia
     justia_info = retriever.search_justia(query)
     if "error" not in justia_info:
-        return f"""
-Case Information from Justia:
-Case Name: {justia_info['case_name']}
-Citation: {justia_info['citation']}
-Summary: {justia_info['summary']}
-More Info: {justia_info['url']}
-            """
-    return "Unable to find case information from available sources."
-def case_info_retriever():
-    st.subheader("Case Information Retriever")
-    query = st.text_input("Enter case name, number, or any relevant information:")
-    if st.button("Retrieve Case Information"):
-        with st.spinner("Retrieving case information..."):
-            result = get_case_information(query)
-        st.markdown(result)
 # --- Streamlit App ---
 # Custom CSS to improve the overall look
 st.markdown("""
@@ -1180,12 +1411,12 @@ st.title("Lex AI - Advanced Legal Assistant")
 # Sidebar with feature selection
 with st.sidebar:
-    st.title(" AI")
     st.subheader("Advanced Legal Assistant")
     feature = st.selectbox(
         "Select a feature",
-        ["Legal Chatbot", "Document Analysis", "Case Precedent Finder", "Legal Cost Estimator", "Contract Analysis", "Case Trend Visualizer", "Case Information Retrieval"]
     )
 if feature == "Legal Chatbot":
     st.subheader("Legal Chatbot")
@@ -1348,6 +1579,8 @@ elif feature == "Case Trend Visualizer":
 elif feature == "Case Information Retrieval":
     case_info_retriever()
 # Add a footer with a disclaimer
 # Footer
 st.markdown("---")

 from docx.shared import Inches
 from datetime import datetime
 import re
+import logging
 import base64
 from typing import List, Dict, Any
 import matplotlib.pyplot as plt
+from bs4 import BeautifulSoup, NavigableString, Tag
 from io import StringIO
 import wikipedia
 from googleapiclient.discovery import build
 from httpx_sse import SSEError
 from difflib import SequenceMatcher
 from datetime import datetime
+import spacy
+import time
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import networkx as nx
+nlp = spacy.load("en_core_web_sm")
 # Error handling for optional dependencies
 try:
     else:
         return "The number of cases has remained relatively stable over the five-year period."
+from bs4 import BeautifulSoup, NavigableString, Tag
+import requests
+import time
+import logging
+import re
 class LegalDataRetriever:
     def __init__(self):
         self.session = requests.Session()
         self.session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
         })
+        logging.basicConfig(level=logging.DEBUG)
+        self.logger = logging.getLogger(__name__)
     def search_courtlistener(self, query: str) -> Dict[str, Any]:
         """
         Search CourtListener for case information.
         """
         url = f"https://www.courtlistener.com/api/rest/v3/search/?q={query}&type=o&format=json"
+        for attempt in range(3):  # Retry up to 3 times
+            try:
+                response = self.session.get(url)
+                response.raise_for_status()
+                data = response.json()
+                break
+            except (requests.RequestException, ValueError) as e:
+                self.logger.error(f"Attempt {attempt + 1} failed: {e}")
+                if attempt == 2:
+                    return {"error": f"Failed to retrieve or parse data from CourtListener: {e}"}
+                time.sleep(2)  # Wait before retrying
         if data['count'] == 0:
             return {"error": "No results found"}
         result = data['results'][0]
+        case_url = f"https://www.courtlistener.com{result['absolute_url']}"
+        try:
+            case_response = self.session.get(case_url)
+            case_response.raise_for_status()
+            soup = BeautifulSoup(case_response.text, 'html.parser')
+        except requests.RequestException as e:
+            self.logger.error(f"Failed to retrieve case page: {e}")
+            return {"error": f"Failed to retrieve case page: {e}"}
+        judges = self.extract_judges(soup)
+        author = self.extract_author(soup, judges)
+        court_opinion = self.extract_court_opinion(soup)
         return {
             "case_name": result['caseName'],
             "date_filed": result['dateFiled'],
+            "docket_number": result.get('docketNumber', 'Not available'),
             "court": result['court'],
+            "status": result.get('status', 'Not available'),
+            "url": case_url,
+            "judges": judges,
+            "author": author,
+            "court_opinion": court_opinion
         }
+    def extract_judges(self, soup):
+        judges = []
+        judge_elements = soup.find_all('a', class_='judge-link')
+        if judge_elements:
+            judges = [judge.text.strip() for judge in judge_elements]
+        else:
+            judge_info = soup.find('p', class_='bottom')
+            if judge_info:
+                judges = [j.strip() for j in judge_info.text.split(',') if j.strip()]
+        if not judges:
+            self.logger.warning("No judges found in the HTML structure, searching in text content")
+            text_content = soup.get_text()
+            judge_patterns = [
+                r'(?:Judge|Justice)[s]?:?\s*(.*?)\.',
+                r'(?:Before|Authored by):?\s*(.*?)\.',
+                r'(.*?),\s*(?:Circuit Judge|District Judge|Chief Judge)'
+            ]
+            for pattern in judge_patterns:
+                judge_match = re.search(pattern, text_content, re.IGNORECASE)
+                if judge_match:
+                    judges = [j.strip() for j in judge_match.group(1).split(',') if j.strip()]
+                    break
+        return judges if judges else ["Not available"]
+    def extract_author(self, soup, judges):
+        author = "Not available"
+        author_elem = soup.find('span', class_='author')
+        if author_elem:
+            author = author_elem.text.strip()
+        elif judges and judges[0] != "Not available":
+            author = judges[0]  # Assume the first judge is the author if not explicitly stated
+        if author == "Not available":
+            self.logger.warning("No author found in the HTML structure, searching in text content")
+            text_content = soup.get_text()
+            author_patterns = [
+                r'(?:Author|Written by):?\s*(.*?)\.',
+                r'(.*?)\s*delivered the opinion of the court',
+                r'(.*?),\s*(?:Circuit Judge|District Judge|Chief Judge).*?writing for the court'
+            ]
+            for pattern in author_patterns:
+                author_match = re.search(pattern, text_content, re.IGNORECASE)
+                if author_match:
+                    author = author_match.group(1).strip()
+                    break
+        return author
+    def extract_court_opinion(self, soup):
+        # Target the article tag with class col-sm-9 first
+        article_div = soup.find('article', class_='col-sm-9')
+        if not article_div:
+            self.logger.error("Could not find the main article div (col-sm-9).")
+            return "Case details not available (main article div not found)."
+        # Find the tab-content div within the article div
+        opinion_div = article_div.find('div', class_='tab-content')
+        if not opinion_div:
+            self.logger.error("Could not find the case details content (tab-content div).")
+            return "Case details not available (tab-content div not found)."
+        # Extract all text from the tab-content div
+        case_details = opinion_div.get_text(separator='\n', strip=True)
+        # Clean up the text
+        case_details = re.sub(r'\n+', '\n', case_details)  # Remove multiple newlines
+        case_details = re.sub(r'\s+', ' ', case_details)  # Remove extra whitespace
+        return case_details
     def search_justia(self, query: str) -> Dict[str, Any]:
         """
         Search Justia for case information.
             "url": first_result.find('a')['href'],
         }
+def case_info_retriever():
+    st.subheader("Case Information Retriever")
+    query = st.text_input("Enter case name, number, or any relevant information:")
+    if st.button("Retrieve Case Information"):
+        with st.spinner("Retrieving case information..."):
+            result = get_case_information(query)
+        if "error" in result:
+            st.error(result["error"])
+        else:
+            st.success("Case information retrieved successfully!")
+            # Display case information
+            st.subheader("Case Details")
+            col1, col2 = st.columns(2)
+            with col1:
+                st.write(f"**Case Name:** {result['case_name']}")
+                st.write(f"**Date Filed:** {result['date_filed']}")
+                st.write(f"**Docket Number:** {result['docket_number']}")
+            with col2:
+                st.write(f"**Court:** {result['court']}")
+                st.write(f"**Status:** {result['status']}")
+                st.write(f"**[View on CourtListener]({result['url']})**")
+            # Display judges and author
+            st.subheader("Judges and Author")
+            st.write(f"**Judges:** {', '.join(result['judges'])}")
+            st.write(f"**Author:** {result['author']}")
+            # Display case details (formerly court opinion)
+            st.subheader("Case Details")
+            st.markdown(result['court_opinion'])
+            # Option to download the case information
+            case_info_text = f"""
+            Case Name: {result['case_name']}
+            Date Filed: {result['date_filed']}
+            Docket Number: {result['docket_number']}
+            Court: {result['court']}
+            Status: {result['status']}
+            Judges: {', '.join(result['judges'])}
+            Author: {result['author']}
+            Case Details:
+            {result['court_opinion']}
+            View on CourtListener: {result['url']}
+            """
+            st.download_button(
+                label="Download Case Information",
+                data=case_info_text,
+                file_name="case_information.txt",
+                mime="text/plain"
+            )
+def get_case_information(query: str) -> Dict[str, Any]:
     retriever = LegalDataRetriever()
     # Search CourtListener
     cl_info = retriever.search_courtlistener(query)
     if "error" not in cl_info:
+        return cl_info
+    # Search Justia if CourtListener fails
     justia_info = retriever.search_justia(query)
     if "error" not in justia_info:
+        return justia_info
+    return {"error": "Unable to find case information from available sources."}
+def extract_text_from_document(uploaded_file):
+    text = ""
+    if uploaded_file.type == "application/pdf":
+        pdf_reader = PyPDF2.PdfReader(uploaded_file)
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+        doc = docx.Document(uploaded_file)
+        for para in doc.paragraphs:
+            text += para.text + "\n"
+    else:
+        text = uploaded_file.getvalue().decode("utf-8")
+    return text
+def split_text(text, max_chunk_size=4000):
+    return [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
+def generate_legal_brief(case_info):
+    chunks = split_text(case_info)
+    full_brief = ""
+    for i, chunk in enumerate(chunks):
+        prompt = f"""Generate a part of a comprehensive legal brief based on the following information. This is part {i+1} of {len(chunks)}. Focus on:
+        1. A summary of the facts
+        2. Identification of key legal issues
+        3. Relevant laws and precedents
+        4. Legal analysis
+        5. Conclusion and recommendations
+        6. An analysis of why the winning party won
+        7. A review of how the losing party could have potentially won
+        Case Information (Part {i+1}/{len(chunks)}):
+        {chunk}
+        Please provide a detailed and thorough response for the relevant sections based on this part of the information."""
+        try:
+            response = ai71.chat.completions.create(
+                model="tiiuae/falcon-180b-chat",
+                messages=[{"role": "user", "content": prompt}],
+                stream=False,
+            )
+            full_brief += response.choices[0].message.content + "\n\n"
+        except Exception as e:
+            st.error(f"Error generating part {i+1} of the legal brief: {str(e)}")
+            return "Unable to generate complete legal brief due to an error."
+    return full_brief
+def automated_legal_brief_generation_ui():
+    st.title("Automated Legal Brief Generation")
+    if 'legal_brief' not in st.session_state:
+        st.session_state.legal_brief = ""
+    input_method = st.radio("Choose input method:", ("Text Input", "Document Upload"))
+    if input_method == "Text Input":
+        case_info = st.text_area("Enter the case information:", height=300)
+    else:
+        uploaded_file = st.file_uploader("Upload a document containing case details (PDF, DOCX, or TXT)", type=["pdf", "docx", "txt"])
+        if uploaded_file is not None:
+            case_info = extract_text_from_document(uploaded_file)
+        else:
+            case_info = ""
+    if st.button("Generate Legal Brief"):
+        if case_info:
+            with st.spinner("Generating comprehensive legal brief..."):
+                st.session_state.legal_brief = generate_legal_brief(case_info)
+            st.success("Legal brief generated successfully!")
+        else:
+            st.warning("Please provide case information to generate the brief.")
+    if st.session_state.legal_brief:
+        st.subheader("Generated Legal Brief")
+        st.text_area("Legal Brief", st.session_state.legal_brief, height=400)
+        st.download_button(
+            label="Download Legal Brief",
+            data=st.session_state.legal_brief,
+            file_name="legal_brief.txt",
+            mime="text/plain"
+        )
 # --- Streamlit App ---
 # Custom CSS to improve the overall look
 st.markdown("""
 # Sidebar with feature selection
 with st.sidebar:
+    st.title("Lex AI")
     st.subheader("Advanced Legal Assistant")
     feature = st.selectbox(
         "Select a feature",
+        ["Legal Chatbot", "Document Analysis", "Case Precedent Finder", "Legal Cost Estimator", "Contract Analysis", "Case Trend Visualizer", "Case Information Retrieval", "Automated Legal Brief Generation"]
     )
 if feature == "Legal Chatbot":
     st.subheader("Legal Chatbot")
 elif feature == "Case Information Retrieval":
     case_info_retriever()
+elif feature == "Automated Legal Brief Generation":
+    automated_legal_brief_generation_ui()
 # Add a footer with a disclaimer
 # Footer
 st.markdown("---")