raghavNCI
commited on
Commit
·
91637b1
1
Parent(s):
f00f379
search criteria
Browse files
nuse_modules/google_search.py
CHANGED
@@ -6,14 +6,39 @@ import requests
|
|
6 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
7 |
GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID")
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
def search_google_news(keywords: list[str], num_results: int = 5):
|
10 |
query = " ".join(keywords)
|
|
|
|
|
11 |
url = (
|
12 |
f"https://www.googleapis.com/customsearch/v1"
|
13 |
f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
|
14 |
f"&q={query}&num={num_results}"
|
15 |
)
|
16 |
-
|
17 |
try:
|
18 |
res = requests.get(url, timeout=10)
|
19 |
res.raise_for_status()
|
@@ -21,11 +46,13 @@ def search_google_news(keywords: list[str], num_results: int = 5):
|
|
21 |
results = []
|
22 |
|
23 |
for item in data.get("items", []):
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
29 |
|
30 |
return results
|
31 |
except Exception as e:
|
|
|
6 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
7 |
GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID")
|
8 |
|
9 |
+
# Map of trusted domains per region
|
10 |
+
TRUSTED_SOURCES_BY_REGION = {
|
11 |
+
"global": ["bbc.com", "reuters.com", "apnews.com", "nytimes.com"],
|
12 |
+
"india": ["thehindu.com", "indiatoday.in", "timesofindia.indiatimes.com", "scroll.in"],
|
13 |
+
"china": ["scmp.com", "chinadaily.com.cn"],
|
14 |
+
"middle_east": ["aljazeera.com", "arabnews.com"]
|
15 |
+
}
|
16 |
+
|
17 |
+
def infer_region_from_keywords(keywords: list[str]) -> str:
|
18 |
+
for kw in keywords:
|
19 |
+
k = kw.lower()
|
20 |
+
if k in {"india", "modi", "delhi", "supreme court"}:
|
21 |
+
return "india"
|
22 |
+
elif k in {"china", "beijing", "xi jinping"}:
|
23 |
+
return "china"
|
24 |
+
elif k in {"gaza", "israel", "palestine", "jerusalem"}:
|
25 |
+
return "middle_east"
|
26 |
+
return "global"
|
27 |
+
|
28 |
+
def is_trusted_domain(url: str, region: str) -> bool:
|
29 |
+
trusted = TRUSTED_SOURCES_BY_REGION.get(region, TRUSTED_SOURCES_BY_REGION["global"])
|
30 |
+
return any(domain in url for domain in trusted)
|
31 |
+
|
32 |
def search_google_news(keywords: list[str], num_results: int = 5):
|
33 |
query = " ".join(keywords)
|
34 |
+
region = infer_region_from_keywords(keywords)
|
35 |
+
|
36 |
url = (
|
37 |
f"https://www.googleapis.com/customsearch/v1"
|
38 |
f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
|
39 |
f"&q={query}&num={num_results}"
|
40 |
)
|
41 |
+
|
42 |
try:
|
43 |
res = requests.get(url, timeout=10)
|
44 |
res.raise_for_status()
|
|
|
46 |
results = []
|
47 |
|
48 |
for item in data.get("items", []):
|
49 |
+
link = item.get("link")
|
50 |
+
if is_trusted_domain(link, region):
|
51 |
+
results.append({
|
52 |
+
"title": item.get("title"),
|
53 |
+
"link": link,
|
54 |
+
"snippet": item.get("snippet"),
|
55 |
+
})
|
56 |
|
57 |
return results
|
58 |
except Exception as e:
|