llamameta commited on
Commit
49c1c40
·
verified ·
1 Parent(s): 115c411

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -3
app.py CHANGED
@@ -541,12 +541,22 @@ async def website_summarizer(url: str, proxy: Optional[str] = None):
541
  @app.get("/api/ask_website")
542
  async def ask_website(url: str, question: str, model: str = "llama-3-70b", proxy: Optional[str] = None):
543
  """
544
- Asks a question about the content of a given website.
545
  """
546
  try:
547
- # Extract text from the given URL
 
 
 
 
 
 
 
 
548
  proxies = {'http': proxy, 'https': proxy} if proxy else None
549
- response = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"}, proxies=proxies)
 
 
550
  response.raise_for_status()
551
  visible_text = extract_text_from_webpage(response.text)
552
  if len(visible_text) > 7500: # Adjust max_chars based on your needs
 
541
  @app.get("/api/ask_website")
542
  async def ask_website(url: str, question: str, model: str = "llama-3-70b", proxy: Optional[str] = None):
543
  """
544
+ Asks a question about the content of a given website using Lynx user agent for better compatibility.
545
  """
546
  try:
547
+ # Generate random Lynx user agent
548
+ def get_useragent():
549
+ lynx_version = f"Lynx/{random.randint(2, 3)}.{random.randint(8, 9)}.{random.randint(0, 2)}"
550
+ libwww_version = f"libwww-FM/{random.randint(2, 3)}.{random.randint(13, 15)}"
551
+ ssl_mm_version = f"SSL-MM/{random.randint(1, 2)}.{random.randint(3, 5)}"
552
+ openssl_version = f"OpenSSL/{random.randint(1, 3)}.{random.randint(0, 4)}.{random.randint(0, 9)}"
553
+ return f"{lynx_version} {libwww_version} {ssl_mm_version} {openssl_version}"
554
+
555
+ # Extract text from the given URL using Lynx user agent
556
  proxies = {'http': proxy, 'https': proxy} if proxy else None
557
+ headers = {"User-Agent": get_useragent()}
558
+
559
+ response = requests.get(url, headers=headers, proxies=proxies)
560
  response.raise_for_status()
561
  visible_text = extract_text_from_webpage(response.text)
562
  if len(visible_text) > 7500: # Adjust max_chars based on your needs