raghavNCI commited on
Commit
8121f99
·
1 Parent(s): af46212
Files changed (1) hide show
  1. nuse_modules/google_search.py +6 -33
nuse_modules/google_search.py CHANGED
@@ -6,39 +6,14 @@ import requests
6
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
7
  GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID")
8
 
9
- # Map of trusted domains per region
10
- TRUSTED_SOURCES_BY_REGION = {
11
- "global": ["bbc.com", "reuters.com", "apnews.com", "nytimes.com"],
12
- "india": ["thehindu.com", "indiatoday.in", "timesofindia.indiatimes.com", "scroll.in"],
13
- "china": ["scmp.com", "chinadaily.com.cn"],
14
- "middle_east": ["aljazeera.com", "arabnews.com"]
15
- }
16
-
17
- def infer_region_from_keywords(keywords: list[str]) -> str:
18
- for kw in keywords:
19
- k = kw.lower()
20
- if k in {"india", "modi", "delhi", "supreme court"}:
21
- return "india"
22
- elif k in {"china", "beijing", "xi jinping"}:
23
- return "china"
24
- elif k in {"gaza", "israel", "palestine", "jerusalem"}:
25
- return "middle_east"
26
- return "global"
27
-
28
- def is_trusted_domain(url: str, region: str) -> bool:
29
- trusted = TRUSTED_SOURCES_BY_REGION.get(region, TRUSTED_SOURCES_BY_REGION["global"])
30
- return any(domain in url for domain in trusted)
31
-
32
  def search_google_news(keywords: list[str], num_results: int = 5):
33
  query = " ".join(keywords)
34
- region = infer_region_from_keywords(keywords)
35
-
36
  url = (
37
  f"https://www.googleapis.com/customsearch/v1"
38
  f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
39
  f"&q={query}&num={num_results}"
40
  )
41
-
42
  try:
43
  res = requests.get(url, timeout=10)
44
  res.raise_for_status()
@@ -46,13 +21,11 @@ def search_google_news(keywords: list[str], num_results: int = 5):
46
  results = []
47
 
48
  for item in data.get("items", []):
49
- link = item.get("link")
50
- if is_trusted_domain(link, region):
51
- results.append({
52
- "title": item.get("title"),
53
- "link": link,
54
- "snippet": item.get("snippet"),
55
- })
56
 
57
  return results
58
  except Exception as e:
 
6
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
7
  GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def search_google_news(keywords: list[str], num_results: int = 5):
10
  query = " ".join(keywords)
 
 
11
  url = (
12
  f"https://www.googleapis.com/customsearch/v1"
13
  f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
14
  f"&q={query}&num={num_results}"
15
  )
16
+
17
  try:
18
  res = requests.get(url, timeout=10)
19
  res.raise_for_status()
 
21
  results = []
22
 
23
  for item in data.get("items", []):
24
+ results.append({
25
+ "title": item.get("title"),
26
+ "link": item.get("link"),
27
+ "snippet": item.get("snippet"),
28
+ })
 
 
29
 
30
  return results
31
  except Exception as e: