Spaces:

Johan713
/

aAzelll

Sleeping

App Files Files Community

Johan713 commited on Aug 4, 2024

Commit

b99151b

verified ·

1 Parent(s): 26b1e1e

Update app2.py

Browse files

Files changed (1) hide show

app2.py +247 -96

app2.py CHANGED Viewed

@@ -295,62 +295,176 @@ def extract_important_info(text: str) -> str:
     prompt = f"Extract and highlight the most important legal information from the following text. Use markdown to emphasize key points:\n\n{text}"
     return get_ai_response(prompt)
-def fetch_detailed_content(url: str) -> str:
     try:
-        response = requests.get(url)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, 'html.parser')
-        # Extract main content (this may need to be adjusted based on the structure of the target websites)
-        main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content')
-        if main_content:
-            # Extract text from paragraphs
-            paragraphs = main_content.find_all('p')
-            content = "\n\n".join([p.get_text() for p in paragraphs])
-            # Limit content to a reasonable length (e.g., first 1000 characters)
-            return content[:1000] + "..." if len(content) > 1000 else content
-        else:
-            return "Unable to extract detailed content from the webpage."
     except Exception as e:
-        return f"Error fetching detailed content: {str(e)}"
 def query_public_case_law(query: str) -> List[Dict[str, Any]]:
-    """
-    Query publicly available case law databases (Justia and CourtListener)
-    and perform a web search to find related cases.
-    """
     cases = []
-    # --- Justia Search (Using BeautifulSoup) ---
-    justia_url = f"https://law.justia.com/cases/?q={query}"
     justia_headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
     }
     try:
         justia_response = requests.get(justia_url, headers=justia_headers)
         justia_response.raise_for_status()
         justia_soup = BeautifulSoup(justia_response.text, 'html.parser')
-        justia_results = justia_soup.find_all('div', class_='case-listing')
-        for result in justia_results[:3]:  # Limit Justia results to 3
-            title = result.find('h6').text.strip()
-            citation = result.find('p', class_='citation').text.strip()
-            summary = result.find('p', class_='summary').text.strip()
-            url = result.find('a')['href']
-            cases.append({
-                "source": "Justia",
-                "case_name": title,
-                "citation": citation,
-                "summary": summary,
-                "url": url
-            })
     except requests.RequestException as e:
         print(f"Error querying Justia: {e}")
-    # --- CourtListener Search (Using API) ---
     courtlistener_url = f"https://www.courtlistener.com/api/rest/v3/search/?q={query}&type=o&format=json"
     for attempt in range(3):  # Retry up to 3 times
         try:
@@ -361,9 +475,7 @@ def query_public_case_law(query: str) -> List[Dict[str, Any]]:
         except (requests.RequestException, ValueError) as e:
             print(f"Attempt {attempt + 1} failed: {e}")
             if attempt == 2:
-                print(
-                    f"Failed to retrieve or parse data from CourtListener: {e}"
-                )
                 break
             time.sleep(2)
@@ -407,42 +519,61 @@ def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
             "wikipedia_summary": {"summary": "Error occurred", "url": "", "title": ""}
         }
 def find_case_precedents(case_details: str) -> Dict[str, Any]:
     """Finds relevant case precedents based on provided details."""
     try:
-        # Initial AI analysis of the case details
-        analysis_prompt = f"Analyze the following case details and identify key legal concepts and relevant precedents:\n\n{case_details}"
-        initial_analysis = get_ai_response(analysis_prompt)
         # Query public case law databases
         public_cases = query_public_case_law(case_details)
-        # Perform web search (existing functionality)
         web_results = search_web(f"legal precedent {case_details}", num_results=3)
-        # Perform Wikipedia search (existing functionality)
         wiki_result = search_wikipedia(f"legal case {case_details}")
         # Compile all information
-        compilation_prompt = f"""Compile a comprehensive summary of case precedents based on the following information:
-        Initial Analysis: {initial_analysis}
         Public Case Law Results:
-        {public_cases}
         Web Search Results:
-        {web_results}
         Wikipedia Information:
         {wiki_result['summary']}
-        Provide a well-structured summary highlighting the most relevant precedents and legal principles."""
-        final_summary = get_ai_response(compilation_prompt)
         return {
-            "summary": final_summary,
             "public_cases": public_cases,
             "web_results": web_results,
             "wikipedia": wiki_result
@@ -1544,14 +1675,9 @@ elif feature == "Document Analysis":
 elif feature == "Case Precedent Finder":
     st.subheader("Case Precedent Finder")
-    # Initialize session state for precedents if not exists
     if 'precedents' not in st.session_state:
         st.session_state.precedents = None
-    # Initialize session state for visibility toggles if not exists
-    if 'visibility_toggles' not in st.session_state:
-        st.session_state.visibility_toggles = {}
     case_details = st.text_area("Enter case details:")
     if st.button("Find Precedents"):
         with st.spinner("Searching for relevant case precedents..."):
@@ -1560,51 +1686,76 @@ elif feature == "Case Precedent Finder":
             except Exception as e:
                 st.error(f"An error occurred while finding case precedents: {str(e)}")
-    # Display results if precedents are available
     if st.session_state.precedents:
         precedents = st.session_state.precedents
         st.write("### Summary of Relevant Case Precedents")
         st.markdown(precedents["summary"])
         st.write("### Related Cases from Public Databases")
         for i, case in enumerate(precedents["public_cases"], 1):
-            st.write(f"**{i}. {case['case_name']} ({case['source']}) - {case.get('citation', '')}**")
-            st.write(f"Summary: {case.get('summary', 'Not available')}")
-            st.write(f"[Read full case]({case['url']})")
-            st.write("---")
         st.write("### Additional Web Results")
         for i, result in enumerate(precedents["web_results"], 1):
-            st.write(f"**{i}. [{result['title']}]({result['link']})**")
-            # Create a unique key for each toggle
-            toggle_key = f"toggle_{i}"
-            # Initialize the toggle state if it doesn't exist
-            if toggle_key not in st.session_state.visibility_toggles:
-                st.session_state.visibility_toggles[toggle_key] = False
-            # Create a button to toggle visibility
-            if st.button(f"{'Hide' if st.session_state.visibility_toggles[toggle_key] else 'Show'} Full Details for Result {i}", key=f"button_{i}"):
-                st.session_state.visibility_toggles[toggle_key] = not st.session_state.visibility_toggles[toggle_key]
-            # Show full details if toggle is True
-            if st.session_state.visibility_toggles[toggle_key]:
-                # Fetch and display more detailed content
-                detailed_content = fetch_detailed_content(result['link'])
-                st.markdown(detailed_content)
-            else:
-                # Show a brief summary when details are hidden
-                brief_summary = result['snippet'].split('\n')[0][:200] + "..." if len(result['snippet']) > 200 else result['snippet'].split('\n')[0]
-                st.write(f"Brief Summary: {brief_summary}")
-            st.write("---")
         st.write("### Wikipedia Information")
         wiki_info = precedents["wikipedia"]
-        st.write(f"**[{wiki_info['title']}]({wiki_info['url']})**")
-        st.markdown(wiki_info['summary'])
 elif feature == "Legal Cost Estimator":
     legal_cost_estimator_ui()

     prompt = f"Extract and highlight the most important legal information from the following text. Use markdown to emphasize key points:\n\n{text}"
     return get_ai_response(prompt)
+user_agents = [
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15',
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
+    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36'
+]
+# Rate limiting parameters
+MIN_DELAY = 3  # Minimum delay between requests in seconds
+MAX_DELAY = 10  # Maximum delay between requests in seconds
+last_request_time = 0
+def get_random_user_agent():
+    return random.choice(user_agents)
+def rate_limit():
+    global last_request_time
+    current_time = time.time()
+    time_since_last_request = current_time - last_request_time
+    if time_since_last_request < MIN_DELAY:
+        sleep_time = random.uniform(MIN_DELAY, MAX_DELAY)
+        time.sleep(sleep_time)
+    last_request_time = time.time()
+def fetch_detailed_content(url):
+    rate_limit()
+    chrome_options = Options()
+    chrome_options.add_argument("--headless")
+    chrome_options.add_argument("--no-sandbox")
+    chrome_options.add_argument("--disable-dev-shm-usage")
+    chrome_options.add_argument(f"user-agent={get_random_user_agent()}")
     try:
+        with webdriver.Chrome(options=chrome_options) as driver:
+            driver.get(url)
+            # Wait for the main content to load
+            WebDriverWait(driver, 20).until(
+                EC.presence_of_element_located((By.TAG_NAME, "body"))
+            )
+            # Scroll to load any lazy-loaded content
+            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+            time.sleep(2)  # Wait for any dynamic content to load
+            # Get the page source after JavaScript execution
+            page_source = driver.page_source
+            # Use BeautifulSoup for parsing
+            soup = BeautifulSoup(page_source, 'html.parser')
+            # Remove script and style elements
+            for script in soup(["script", "style"]):
+                script.decompose()
+            # Extract main content (customize based on the website structure)
+            main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile('content|main'))
+            if not main_content:
+                main_content = soup.body
+            # Extract text content
+            text_content = main_content.get_text(separator='\n', strip=True)
+            # Clean and process the content
+            cleaned_content = clean_content(text_content)
+            return cleaned_content
     except Exception as e:
+        print(f"Error fetching content: {e}")
+        return f"Unable to fetch detailed content. Error: {str(e)}"
+def clean_content(text):
+    # Remove extra whitespace and newlines
+    text = re.sub(r'\s+', ' ', text).strip()
+    # Remove any remaining HTML tags
+    text = re.sub(r'<[^>]+>', '', text)
+    # Remove special characters and digits (customize as needed)
+    text = re.sub(r'[^a-zA-Z\s.,;:?!-]', '', text)
+    # Split into sentences
+    sentences = re.split(r'(?<=[.!?])\s+', text)
+    # Remove short sentences (likely to be noise)
+    sentences = [s for s in sentences if len(s.split()) > 3]
+    # Join sentences back together
+    cleaned_text = ' '.join(sentences)
+    return cleaned_text
+def extract_structured_data(soup):
+    structured_data = {}
+    # Extract title
+    title = soup.find('title')
+    if title:
+        structured_data['title'] = title.get_text(strip=True)
+    # Extract meta description
+    meta_desc = soup.find('meta', attrs={'name': 'description'})
+    if meta_desc:
+        structured_data['description'] = meta_desc.get('content', '')
+    # Extract headings
+    headings = []
+    for tag in ['h1', 'h2', 'h3']:
+        for heading in soup.find_all(tag):
+            headings.append({
+                'level': tag,
+                'text': heading.get_text(strip=True)
+            })
+    structured_data['headings'] = headings
+    # Extract links
+    links = []
+    for link in soup.find_all('a', href=True):
+        links.append({
+            'text': link.get_text(strip=True),
+            'href': link['href']
+        })
+    structured_data['links'] = links
+    # Extract images
+    images = []
+    for img in soup.find_all('img', src=True):
+        images.append({
+            'src': img['src'],
+            'alt': img.get('alt', '')
+        })
+    structured_data['images'] = images
+    return structured_data
 def query_public_case_law(query: str) -> List[Dict[str, Any]]:
+    """Query publicly available case law databases (Justia and CourtListener) to find related cases."""
     cases = []
+    # Justia Search using Google
+    justia_url = f"https://www.google.com/search?q={query}+case+law+site:law.justia.com"
     justia_headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
     }
     try:
         justia_response = requests.get(justia_url, headers=justia_headers)
         justia_response.raise_for_status()
         justia_soup = BeautifulSoup(justia_response.text, 'html.parser')
+        justia_results = justia_soup.find_all('div', class_='g')
+        for result in justia_results[:3]:  # Limit to 3 results
+            title_elem = result.find('h3', class_='r')
+            if title_elem and 'law.justia.com' in title_elem.find('a')['href']:
+                title = title_elem.text.strip()
+                url = title_elem.find('a')['href']
+                snippet = result.find('div', class_='s').text.strip()
+                cases.append({
+                    "source": "Justia",
+                    "case_name": title,
+                    "summary": snippet,
+                    "url": url
+                })
     except requests.RequestException as e:
         print(f"Error querying Justia: {e}")
+    # CourtListener Search
     courtlistener_url = f"https://www.courtlistener.com/api/rest/v3/search/?q={query}&type=o&format=json"
     for attempt in range(3):  # Retry up to 3 times
         try:
         except (requests.RequestException, ValueError) as e:
             print(f"Attempt {attempt + 1} failed: {e}")
             if attempt == 2:
+                print(f"Failed to retrieve or parse data from CourtListener: {e}")
                 break
             time.sleep(2)
             "wikipedia_summary": {"summary": "Error occurred", "url": "", "title": ""}
         }
+def format_public_cases(cases: List[Dict[str, Any]]) -> str:
+    """Format public cases for the AI prompt."""
+    formatted = ""
+    for case in cases:
+        formatted += f"Source: {case['source']}\n"
+        formatted += f"Case Name: {case['case_name']}\n"
+        if 'citation' in case:
+            formatted += f"Citation: {case['citation']}\n"
+        if 'summary' in case:
+            formatted += f"Summary: {case['summary']}\n"
+        if 'date_filed' in case:
+            formatted += f"Date Filed: {case['date_filed']}\n"
+        if 'docket_number' in case:
+            formatted += f"Docket Number: {case['docket_number']}\n"
+        if 'court' in case:
+            formatted += f"Court: {case['court']}\n"
+        formatted += "\n"
+    return formatted
 def find_case_precedents(case_details: str) -> Dict[str, Any]:
     """Finds relevant case precedents based on provided details."""
     try:
         # Query public case law databases
         public_cases = query_public_case_law(case_details)
+        # Perform web search
         web_results = search_web(f"legal precedent {case_details}", num_results=3)
+        # Perform Wikipedia search
         wiki_result = search_wikipedia(f"legal case {case_details}")
         # Compile all information
+        compilation_prompt = f"""
+        Analyze and summarize the following case law information, focusing solely on factual elements and legal principles. Do not include any speculative or fictional content:
+        Case Details: {case_details}
         Public Case Law Results:
+        {format_public_cases(public_cases)}
         Web Search Results:
+        {format_web_results(web_results)}
         Wikipedia Information:
         {wiki_result['summary']}
+        Provide a concise, fact-based summary highlighting the most relevant precedents and legal principles.
+        Ensure all information is directly sourced from the provided materials.
+        Do not introduce any speculative or hypothetical scenarios.
+        """
+        summary = get_ai_response(compilation_prompt)
         return {
+            "summary": summary,
             "public_cases": public_cases,
             "web_results": web_results,
             "wikipedia": wiki_result
 elif feature == "Case Precedent Finder":
     st.subheader("Case Precedent Finder")
     if 'precedents' not in st.session_state:
         st.session_state.precedents = None
     case_details = st.text_area("Enter case details:")
     if st.button("Find Precedents"):
         with st.spinner("Searching for relevant case precedents..."):
             except Exception as e:
                 st.error(f"An error occurred while finding case precedents: {str(e)}")
     if st.session_state.precedents:
         precedents = st.session_state.precedents
         st.write("### Summary of Relevant Case Precedents")
         st.markdown(precedents["summary"])
+        st.warning("Note: This summary is based on real case law and does not include fictional elements.")
         st.write("### Related Cases from Public Databases")
         for i, case in enumerate(precedents["public_cases"], 1):
+            with st.expander(f"{i}. {case['case_name']}"):
+                if case['source'] == "Justia":
+                    st.write(f"**Source:** Justia")
+                    st.write(f"**Summary:** {case['summary']}")
+                elif case['source'] == "CourtListener":
+                    st.write(f"**Source:** CourtListener")
+                    st.write(f"**Date Filed:** {case['date_filed']}")
+                    st.write(f"**Docket Number:** {case['docket_number']}")
+                    st.write(f"**Court:** {case['court']}")
+                st.write(f"[Read full case]({case['url']})")
+                if st.button(f"Fetch Full Content for Case {i}", key=f"fetch_case_button_{i}"):
+                    with st.spinner("Fetching detailed content..."):
+                        try:
+                            detailed_content, structured_data = fetch_detailed_content(case['url'])
+                            st.subheader("Cleaned Content")
+                            st.markdown(detailed_content)
+                            st.subheader("Structured Data")
+                            st.json(structured_data)
+                        except Exception as e:
+                            st.error(f"Unable to fetch detailed content: {str(e)}")
+                            st.write("Please visit the source link for full information.")
         st.write("### Additional Web Results")
         for i, result in enumerate(precedents["web_results"], 1):
+            with st.expander(f"{i}. {result['title']}"):
+                st.write(f"**Source:** [{result['link']}]({result['link']})")
+                st.write(f"**Snippet:** {result['snippet']}")
+                if st.button(f"Fetch Full Content for Result {i}", key=f"fetch_result_button_{i}"):
+                    with st.spinner("Fetching detailed content..."):
+                        try:
+                            detailed_content, structured_data = fetch_detailed_content(result['link'])
+                            st.subheader("Cleaned Content")
+                            st.markdown(detailed_content)
+                            st.subheader("Structured Data")
+                            st.json(structured_data)
+                        except Exception as e:
+                            st.error(f"Unable to fetch detailed content: {str(e)}")
+                            st.write("Please visit the source link for full information.")
         st.write("### Wikipedia Information")
         wiki_info = precedents["wikipedia"]
+        with st.expander(f"Wikipedia: {wiki_info['title']}"):
+            st.markdown(wiki_info['summary'])
+            st.write(f"[Read more on Wikipedia]({wiki_info['url']})")
+            if st.button("Fetch Full Wikipedia Content", key="fetch_wiki_button"):
+                with st.spinner("Fetching detailed Wikipedia content..."):
+                    try:
+                        detailed_content, structured_data = fetch_detailed_content(wiki_info['url'])
+                        st.subheader("Cleaned Content")
+                        st.markdown(detailed_content)
+                        st.subheader("Structured Data")
+                        st.json(structured_data)
+                    except Exception as e:
+                        st.error(f"Unable to fetch detailed Wikipedia content: {str(e)}")
+                        st.write("Please visit the Wikipedia link for full information.")
 elif feature == "Legal Cost Estimator":
     legal_cost_estimator_ui()