Johan713 commited on
Commit
f47d39c
·
verified ·
1 Parent(s): 2c22cd3

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +52 -34
app2.py CHANGED
@@ -2,6 +2,9 @@ import streamlit as st
2
  import pandas as pd
3
  import plotly.express as px
4
  import requests
 
 
 
5
  from ai71 import AI71
6
  import PyPDF2
7
  import io
@@ -18,7 +21,6 @@ import matplotlib.pyplot as plt
18
  from bs4 import BeautifulSoup, NavigableString, Tag
19
  from io import StringIO
20
  import wikipedia
21
- from googleapiclient.discovery import build
22
  from typing import List, Optional
23
  from httpx_sse import SSEError
24
  from difflib import SequenceMatcher
@@ -447,43 +449,59 @@ def safe_find(element, selector, class_=None, attr=None):
447
  return found.get(attr) if attr else found.text.strip()
448
  return "Not available"
449
 
450
- def search_web_duckduckgo(query: str, num_results: int = 3) -> List[Dict[str, str]]:
451
  """
452
- Performs a web search using the DuckDuckGo search engine.
453
  Returns a list of dictionaries containing search result title, link, and snippet.
454
  """
455
- base_url = "https://html.duckduckgo.com/html/"
456
- params = {
457
- 'q': query,
458
- 's': '0',
459
- 'dc': '20',
460
- 'o': 'json',
461
- 'api': '/d.js'
462
- }
463
- headers = {
464
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
465
- }
466
 
467
- try:
468
- response = requests.get(base_url, params=params, headers=headers, timeout=10)
469
- response.raise_for_status()
470
- soup = BeautifulSoup(response.content, 'html.parser')
471
-
472
- results = []
473
- for result in soup.find_all('div', class_='result')[:num_results]:
474
- title = result.find('a', class_='result__a').text.strip()
475
- link = result.find('a', class_='result__a')['href']
476
- snippet = result.find('a', class_='result__snippet').text.strip()
477
- results.append({
478
- 'title': title,
479
- 'link': link,
480
- 'snippet': snippet
481
- })
482
-
483
- return results
484
- except requests.RequestException as e:
485
- print(f"Error fetching web search results: {e}")
486
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
 
488
  def estimate_legal_costs(case_type: str, complexity: str, state: str) -> Dict[str, Any]:
489
  """
 
2
  import pandas as pd
3
  import plotly.express as px
4
  import requests
5
+ from googleapiclient.discovery import build
6
+ from googleapiclient.errors import HttpError
7
+ from requests.exceptions import RequestException, ConnectionError, Timeout
8
  from ai71 import AI71
9
  import PyPDF2
10
  import io
 
21
  from bs4 import BeautifulSoup, NavigableString, Tag
22
  from io import StringIO
23
  import wikipedia
 
24
  from typing import List, Optional
25
  from httpx_sse import SSEError
26
  from difflib import SequenceMatcher
 
449
  return found.get(attr) if attr else found.text.strip()
450
  return "Not available"
451
 
452
+ def search_web_duckduckgo(query: str, num_results: int = 3, max_retries: int = 3) -> List[Dict[str, str]]:
453
  """
454
+ Performs a web search using the Google Custom Search API.
455
  Returns a list of dictionaries containing search result title, link, and snippet.
456
  """
457
+ api_key = "AIzaSyD-1OMuZ0CxGAek0PaXrzHOmcDWFvZQtm8"
458
+ cse_id = "877170db56f5c4629"
 
 
 
 
 
 
 
 
 
459
 
460
+ user_agents = [
461
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
462
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15',
463
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36'
464
+ ]
465
+
466
+ for attempt in range(max_retries):
467
+ try:
468
+ headers = {'User-Agent': random.choice(user_agents)}
469
+
470
+ service = build("customsearch", "v1", developerKey=api_key)
471
+
472
+ # Execute the search request
473
+ res = service.cse().list(q=query, cx=cse_id, num=num_results).execute()
474
+
475
+ results = []
476
+ if "items" in res:
477
+ for item in res["items"]:
478
+ result = {
479
+ "title": item["title"],
480
+ "link": item["link"],
481
+ "snippet": item.get("snippet", "")
482
+ }
483
+ results.append(result)
484
+ if len(results) == num_results:
485
+ break
486
+
487
+ return results
488
+
489
+ except HttpError as e:
490
+ print(f"HTTP error occurred: {e}. Attempt {attempt + 1} of {max_retries}")
491
+ except ConnectionError as e:
492
+ print(f"Connection error occurred: {e}. Attempt {attempt + 1} of {max_retries}")
493
+ except Timeout as e:
494
+ print(f"Timeout error occurred: {e}. Attempt {attempt + 1} of {max_retries}")
495
+ except RequestException as e:
496
+ print(f"An error occurred during the request: {e}. Attempt {attempt + 1} of {max_retries}")
497
+ except Exception as e:
498
+ print(f"An unexpected error occurred: {e}. Attempt {attempt + 1} of {max_retries}")
499
+
500
+ # Exponential backoff
501
+ time.sleep(2 ** attempt)
502
+
503
+ print("Max retries reached. No results found.")
504
+ return []
505
 
506
  def estimate_legal_costs(case_type: str, complexity: str, state: str) -> Dict[str, Any]:
507
  """