Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
import json | |
excluded_urls = ["finance.yahoo.com", "google.com/finance"] | |
def search_duckduckgo(keywords): | |
url = f"https://duckduckgo.com/html/?q={'+'.join(keywords)}" | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" | |
} | |
try: | |
response = requests.get(url, headers=headers) | |
response.raise_for_status() | |
return response.text | |
except requests.exceptions.RequestException as e: | |
print(f"Error fetching search results: {e}") | |
return "" | |
def parse_results(html, keywords): | |
soup = BeautifulSoup(html, "html.parser") | |
results = soup.select(".result") | |
parsed_results = [] | |
for result in results: | |
try: | |
link = result.select_one(".result__a").get( | |
"href" | |
) # Changed selector for link | |
title = result.select_one(".result__a").text # Changed selector for title | |
description = result.select_one( | |
".result__snippet" | |
) # Kept the same selector for description | |
if description: | |
description = description.text | |
else: | |
description = "" | |
result_data = { | |
"Link": link, | |
"Title": title, | |
"Description": description, | |
} | |
# Check if the link is not in excluded URLs | |
if not any(excluded_url in link for excluded_url in excluded_urls): | |
# Check if any keyword is in title, description, or link | |
if any( | |
keyword.lower() in result_data["Title"].lower() | |
or keyword.lower() in result_data["Description"].lower() | |
or keyword.lower() in result_data["Link"].lower() | |
for keyword in keywords | |
): | |
print(result_data) | |
parsed_results.append(result_data) | |
except Exception as e: | |
print(f"Error parsing result: {e}") | |
return parsed_results | |
# keywords = ["tatasteel", "finance", "news"] | |
def perform_search(keywords): | |
html = search_duckduckgo(keywords) | |
if html: | |
results = parse_results(html, keywords) | |
if results: | |
with open("results.json", "w", encoding="utf-8") as f: | |
json.dump(results, f, ensure_ascii=False, indent=4) | |
else: | |
print("No results found.") | |
else: | |
print("Failed to fetch search results.") | |
# if __name__ == "__main__": | |
# perform_search(keywords = keywords) | |