Spaces:

Johan713
/

aAzelll

Sleeping

App Files Files Community

Johan713 commited on Aug 4, 2024

Commit

f47d39c

verified ·

1 Parent(s): 2c22cd3

Update app2.py

Browse files

Files changed (1) hide show

app2.py +52 -34

app2.py CHANGED Viewed

@@ -2,6 +2,9 @@ import streamlit as st
 import pandas as pd
 import plotly.express as px
 import requests
 from ai71 import AI71
 import PyPDF2
 import io
@@ -18,7 +21,6 @@ import matplotlib.pyplot as plt
 from bs4 import BeautifulSoup, NavigableString, Tag
 from io import StringIO
 import wikipedia
-from googleapiclient.discovery import build
 from typing import List, Optional
 from httpx_sse import SSEError
 from difflib import SequenceMatcher
@@ -447,43 +449,59 @@ def safe_find(element, selector, class_=None, attr=None):
         return found.get(attr) if attr else found.text.strip()
     return "Not available"
-def search_web_duckduckgo(query: str, num_results: int = 3) -> List[Dict[str, str]]:
     """
-    Performs a web search using the DuckDuckGo search engine.
     Returns a list of dictionaries containing search result title, link, and snippet.
     """
-    base_url = "https://html.duckduckgo.com/html/"
-    params = {
-        'q': query,
-        's': '0',
-        'dc': '20',
-        'o': 'json',
-        'api': '/d.js'
-    }
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-    }
-    try:
-        response = requests.get(base_url, params=params, headers=headers, timeout=10)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.content, 'html.parser')
-        results = []
-        for result in soup.find_all('div', class_='result')[:num_results]:
-            title = result.find('a', class_='result__a').text.strip()
-            link = result.find('a', class_='result__a')['href']
-            snippet = result.find('a', class_='result__snippet').text.strip()
-            results.append({
-                'title': title,
-                'link': link,
-                'snippet': snippet
-            })
-        return results
-    except requests.RequestException as e:
-        print(f"Error fetching web search results: {e}")
-        return []
 def estimate_legal_costs(case_type: str, complexity: str, state: str) -> Dict[str, Any]:
     """

 import pandas as pd
 import plotly.express as px
 import requests
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
+from requests.exceptions import RequestException, ConnectionError, Timeout
 from ai71 import AI71
 import PyPDF2
 import io
 from bs4 import BeautifulSoup, NavigableString, Tag
 from io import StringIO
 import wikipedia
 from typing import List, Optional
 from httpx_sse import SSEError
 from difflib import SequenceMatcher
         return found.get(attr) if attr else found.text.strip()
     return "Not available"
+def search_web_duckduckgo(query: str, num_results: int = 3, max_retries: int = 3) -> List[Dict[str, str]]:
     """
+    Performs a web search using the Google Custom Search API.
     Returns a list of dictionaries containing search result title, link, and snippet.
     """
+    api_key = "AIzaSyD-1OMuZ0CxGAek0PaXrzHOmcDWFvZQtm8"
+    cse_id = "877170db56f5c4629"
+    user_agents = [
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15',
+        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36'
+    ]
+    for attempt in range(max_retries):
+        try:
+            headers = {'User-Agent': random.choice(user_agents)}
+            service = build("customsearch", "v1", developerKey=api_key)
+            # Execute the search request
+            res = service.cse().list(q=query, cx=cse_id, num=num_results).execute()
+            results = []
+            if "items" in res:
+                for item in res["items"]:
+                    result = {
+                        "title": item["title"],
+                        "link": item["link"],
+                        "snippet": item.get("snippet", "")
+                    }
+                    results.append(result)
+                    if len(results) == num_results:
+                        break
+            return results
+        except HttpError as e:
+            print(f"HTTP error occurred: {e}. Attempt {attempt + 1} of {max_retries}")
+        except ConnectionError as e:
+            print(f"Connection error occurred: {e}. Attempt {attempt + 1} of {max_retries}")
+        except Timeout as e:
+            print(f"Timeout error occurred: {e}. Attempt {attempt + 1} of {max_retries}")
+        except RequestException as e:
+            print(f"An error occurred during the request: {e}. Attempt {attempt + 1} of {max_retries}")
+        except Exception as e:
+            print(f"An unexpected error occurred: {e}. Attempt {attempt + 1} of {max_retries}")
+        # Exponential backoff
+        time.sleep(2 ** attempt)
+    print("Max retries reached. No results found.")
+    return []
 def estimate_legal_costs(case_type: str, complexity: str, state: str) -> Dict[str, Any]:
     """