raghavNCI commited on
Commit
826a1b8
·
1 Parent(s): aefa1e1

google search functionality

Browse files
Files changed (2) hide show
  1. nuse_modules/google_search.py +32 -0
  2. question.py +37 -26
nuse_modules/google_search.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # nuse_modules/google_search.py
2
+
3
+ import os
4
+ import requests
5
+
6
+ GOOGLE_API_KEY = os.getenv("GOOGLE_SEARCH_API_KEY")
7
+ GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID")
8
+
9
+ def search_google_news(keywords: list[str], num_results: int = 5):
10
+ query = " ".join(keywords)
11
+ url = (
12
+ f"https://www.googleapis.com/customsearch/v1"
13
+ f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
14
+ f"&q={query}&num={num_results}"
15
+ )
16
+
17
+ try:
18
+ res = requests.get(url, timeout=10)
19
+ res.raise_for_status()
20
+ data = res.json()
21
+ results = []
22
+
23
+ for item in data.get("items", []):
24
+ results.append({
25
+ "title": item.get("title"),
26
+ "link": item.get("link"),
27
+ "snippet": item.get("snippet"),
28
+ })
29
+
30
+ return results
31
+ except Exception as e:
32
+ return {"error": str(e)}
question.py CHANGED
@@ -10,6 +10,7 @@ from urllib.parse import quote
10
  import json
11
  from nuse_modules.classifier import classify_question, REVERSE_MAP
12
  from nuse_modules.keyword_extracter import keywords_extractor
 
13
 
14
  load_dotenv()
15
 
@@ -27,6 +28,9 @@ HEADERS = {
27
  "Content-Type": "application/json"
28
  }
29
 
 
 
 
30
  def is_relevant(article, keywords):
31
  text = f"{article.get('title', '')} {article.get('content', '')}".lower()
32
  return any(kw.lower() in text for kw in keywords)
@@ -79,40 +83,47 @@ async def ask_question(input: QuestionInput):
79
  print("Intent ID:", qid)
80
  print("Category:", REVERSE_MAP.get(qid, "unknown"))
81
 
82
- keywords = keywords_extractor(question)
83
 
84
- print("Raw extracted keywords:", keywords)
 
 
85
 
86
- if not keywords:
87
- return {"error": "Keyword extraction failed."}
 
 
88
 
89
- # Step 2: Fetch articles using AND, then fallback to OR
90
- query_and = " AND ".join(f'"{kw}"' for kw in keywords)
91
- articles = fetch_gnews_articles(query_and)
92
 
93
- if not articles:
94
- query_or = " OR ".join(f'"{kw}"' for kw in keywords)
95
- articles = fetch_gnews_articles(query_or)
96
 
97
- relevant_articles = [a for a in articles if is_relevant(a, keywords)]
 
 
98
 
99
- context = "\n\n".join([
100
- a.get("content") or ""
101
- for a in relevant_articles
102
- ])[:15000]
103
 
104
- if not context.strip():
105
- return {
106
- "question": question,
107
- "answer": "Cannot answer – no relevant context found.",
108
- "sources": []
109
- }
 
 
 
 
 
110
 
111
  # Step 3: Ask Mistral to answer using the context
112
  answer_prompt = (
113
  f"You are a concise news assistant. Answer the user's question clearly using the context below if relevant. "
114
  f"If the context is not helpful, you may rely on your own knowledge, but do not mention the context or question again.\n\n"
115
- f"Context:\n{context}\n\n"
116
  f"Question: {question}\n\n"
117
  f"Answer:"
118
  )
@@ -125,8 +136,8 @@ async def ask_question(input: QuestionInput):
125
  return {
126
  "question": question,
127
  "answer": final_answer.strip(),
128
- "sources": [
129
- {"title": a["title"], "url": a["url"]}
130
- for a in relevant_articles
131
- ]
132
  }
 
10
  import json
11
  from nuse_modules.classifier import classify_question, REVERSE_MAP
12
  from nuse_modules.keyword_extracter import keywords_extractor
13
+ from nuse_modules.google_search import search_google_news
14
 
15
  load_dotenv()
16
 
 
28
  "Content-Type": "application/json"
29
  }
30
 
31
+ def should_extract_keywords(type_id: int) -> bool:
32
+ return type_id in {1, 2, 3, 4, 5, 6, 7, 10}
33
+
34
  def is_relevant(article, keywords):
35
  text = f"{article.get('title', '')} {article.get('content', '')}".lower()
36
  return any(kw.lower() in text for kw in keywords)
 
83
  print("Intent ID:", qid)
84
  print("Category:", REVERSE_MAP.get(qid, "unknown"))
85
 
86
+ necessary = should_extract_keywords(qid)
87
 
88
+ if necessary:
89
+ keywords = keywords_extractor(question)
90
+ print("Raw extracted keywords:", keywords)
91
 
92
+ if not keywords:
93
+ return {"error": "Keyword extraction failed."}
94
+
95
+ results = search_google_news(keywords)
96
 
97
+ for r in results:
98
+ print(r["title"], r["link"])
 
99
 
100
+ # Step 2: Fetch articles using AND, then fallback to OR
101
+ # query_and = " AND ".join(f'"{kw}"' for kw in keywords)
102
+ # articles = fetch_gnews_articles(query_and)
103
 
104
+ # if not articles:
105
+ # query_or = " OR ".join(f'"{kw}"' for kw in keywords)
106
+ # articles = fetch_gnews_articles(query_or)
107
 
108
+ # relevant_articles = [a for a in articles if is_relevant(a, keywords)]
 
 
 
109
 
110
+ # context = "\n\n".join([
111
+ # a.get("content") or ""
112
+ # for a in relevant_articles
113
+ # ])[:15000]
114
+
115
+ # if not context.strip():
116
+ # return {
117
+ # "question": question,
118
+ # "answer": "Cannot answer – no relevant context found.",
119
+ # "sources": []
120
+ # }
121
 
122
  # Step 3: Ask Mistral to answer using the context
123
  answer_prompt = (
124
  f"You are a concise news assistant. Answer the user's question clearly using the context below if relevant. "
125
  f"If the context is not helpful, you may rely on your own knowledge, but do not mention the context or question again.\n\n"
126
+ # f"Context:\n{context}\n\n"
127
  f"Question: {question}\n\n"
128
  f"Answer:"
129
  )
 
136
  return {
137
  "question": question,
138
  "answer": final_answer.strip(),
139
+ # "sources": [
140
+ # {"title": a["title"], "url": a["url"]}
141
+ # for a in relevant_articles
142
+ # ]
143
  }