File size: 2,753 Bytes
f3d9e94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import requests
from bs4 import BeautifulSoup
import json

excluded_urls = ["finance.yahoo.com", "google.com/finance"]


def search_duckduckgo(keywords):
    url = f"https://duckduckgo.com/html/?q={'+'.join(keywords)}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching search results: {e}")
        return ""


def parse_results(html, keywords):
    soup = BeautifulSoup(html, "html.parser")
    results = soup.select(".result")
    parsed_results = []

    for result in results:
        try:
            link = result.select_one(".result__a").get(
                "href"
            )  # Changed selector for link
            title = result.select_one(".result__a").text  # Changed selector for title
            description = result.select_one(
                ".result__snippet"
            )  # Kept the same selector for description

            if description:
                description = description.text
            else:
                description = ""

            result_data = {
                "Link": link,
                "Title": title,
                "Description": description,
            }

            # Check if the link is not in excluded URLs
            if not any(excluded_url in link for excluded_url in excluded_urls):
                # Check if any keyword is in title, description, or link
                if any(
                    keyword.lower() in result_data["Title"].lower()
                    or keyword.lower() in result_data["Description"].lower()
                    or keyword.lower() in result_data["Link"].lower()
                    for keyword in keywords
                ):
                    print(result_data)
                    parsed_results.append(result_data)
        except Exception as e:
            print(f"Error parsing result: {e}")

    return parsed_results


# keywords = ["tatasteel", "finance", "news"]


def perform_search(keywords):
    html = search_duckduckgo(keywords)

    if html:
        results = parse_results(html, keywords)
        if results:
            with open("results.json", "w", encoding="utf-8") as f:
                json.dump(results, f, ensure_ascii=False, indent=4)
        else:
            print("No results found.")
    else:
        print("Failed to fetch search results.")


# if __name__ == "__main__":
# perform_search(keywords = keywords)