raghavNCI
commited on
Commit
·
8121f99
1
Parent(s):
af46212
roll back
Browse files
nuse_modules/google_search.py
CHANGED
@@ -6,39 +6,14 @@ import requests
|
|
6 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
7 |
GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID")
|
8 |
|
9 |
-
# Map of trusted domains per region
|
10 |
-
TRUSTED_SOURCES_BY_REGION = {
|
11 |
-
"global": ["bbc.com", "reuters.com", "apnews.com", "nytimes.com"],
|
12 |
-
"india": ["thehindu.com", "indiatoday.in", "timesofindia.indiatimes.com", "scroll.in"],
|
13 |
-
"china": ["scmp.com", "chinadaily.com.cn"],
|
14 |
-
"middle_east": ["aljazeera.com", "arabnews.com"]
|
15 |
-
}
|
16 |
-
|
17 |
-
def infer_region_from_keywords(keywords: list[str]) -> str:
|
18 |
-
for kw in keywords:
|
19 |
-
k = kw.lower()
|
20 |
-
if k in {"india", "modi", "delhi", "supreme court"}:
|
21 |
-
return "india"
|
22 |
-
elif k in {"china", "beijing", "xi jinping"}:
|
23 |
-
return "china"
|
24 |
-
elif k in {"gaza", "israel", "palestine", "jerusalem"}:
|
25 |
-
return "middle_east"
|
26 |
-
return "global"
|
27 |
-
|
28 |
-
def is_trusted_domain(url: str, region: str) -> bool:
|
29 |
-
trusted = TRUSTED_SOURCES_BY_REGION.get(region, TRUSTED_SOURCES_BY_REGION["global"])
|
30 |
-
return any(domain in url for domain in trusted)
|
31 |
-
|
32 |
def search_google_news(keywords: list[str], num_results: int = 5):
|
33 |
query = " ".join(keywords)
|
34 |
-
region = infer_region_from_keywords(keywords)
|
35 |
-
|
36 |
url = (
|
37 |
f"https://www.googleapis.com/customsearch/v1"
|
38 |
f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
|
39 |
f"&q={query}&num={num_results}"
|
40 |
)
|
41 |
-
|
42 |
try:
|
43 |
res = requests.get(url, timeout=10)
|
44 |
res.raise_for_status()
|
@@ -46,13 +21,11 @@ def search_google_news(keywords: list[str], num_results: int = 5):
|
|
46 |
results = []
|
47 |
|
48 |
for item in data.get("items", []):
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
"snippet": item.get("snippet"),
|
55 |
-
})
|
56 |
|
57 |
return results
|
58 |
except Exception as e:
|
|
|
6 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
7 |
GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID")
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
def search_google_news(keywords: list[str], num_results: int = 5):
|
10 |
query = " ".join(keywords)
|
|
|
|
|
11 |
url = (
|
12 |
f"https://www.googleapis.com/customsearch/v1"
|
13 |
f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
|
14 |
f"&q={query}&num={num_results}"
|
15 |
)
|
16 |
+
|
17 |
try:
|
18 |
res = requests.get(url, timeout=10)
|
19 |
res.raise_for_status()
|
|
|
21 |
results = []
|
22 |
|
23 |
for item in data.get("items", []):
|
24 |
+
results.append({
|
25 |
+
"title": item.get("title"),
|
26 |
+
"link": item.get("link"),
|
27 |
+
"snippet": item.get("snippet"),
|
28 |
+
})
|
|
|
|
|
29 |
|
30 |
return results
|
31 |
except Exception as e:
|