Spaces:

nuseAI
/

FastAPI

Sleeping

App Files Files Community

raghavNCI commited on May 28

Commit

826a1b8

1 Parent(s): aefa1e1

google search functionality

Browse files

Files changed (2) hide show

nuse_modules/google_search.py +32 -0
question.py +37 -26

nuse_modules/google_search.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# nuse_modules/google_search.py
+import os
+import requests
+GOOGLE_API_KEY = os.getenv("GOOGLE_SEARCH_API_KEY")
+GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID")
+def search_google_news(keywords: list[str], num_results: int = 5):
+    query = " ".join(keywords)
+    url = (
+        f"https://www.googleapis.com/customsearch/v1"
+        f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
+        f"&q={query}&num={num_results}"
+    )
+    try:
+        res = requests.get(url, timeout=10)
+        res.raise_for_status()
+        data = res.json()
+        results = []
+        for item in data.get("items", []):
+            results.append({
+                "title": item.get("title"),
+                "link": item.get("link"),
+                "snippet": item.get("snippet"),
+            })
+        return results
+    except Exception as e:
+        return {"error": str(e)}

question.py CHANGED Viewed

@@ -10,6 +10,7 @@ from urllib.parse import quote
 import json
 from nuse_modules.classifier import classify_question, REVERSE_MAP
 from nuse_modules.keyword_extracter import keywords_extractor
 load_dotenv()
@@ -27,6 +28,9 @@ HEADERS = {
     "Content-Type": "application/json"
 }
 def is_relevant(article, keywords):
     text = f"{article.get('title', '')} {article.get('content', '')}".lower()
     return any(kw.lower() in text for kw in keywords)
@@ -79,40 +83,47 @@ async def ask_question(input: QuestionInput):
     print("Intent ID:", qid)
     print("Category:", REVERSE_MAP.get(qid, "unknown"))
-    keywords = keywords_extractor(question)
-    print("Raw extracted keywords:", keywords)
-    if not keywords:
-        return {"error": "Keyword extraction failed."}
-    # Step 2: Fetch articles using AND, then fallback to OR
-    query_and = " AND ".join(f'"{kw}"' for kw in keywords)
-    articles = fetch_gnews_articles(query_and)
-    if not articles:
-        query_or = " OR ".join(f'"{kw}"' for kw in keywords)
-        articles = fetch_gnews_articles(query_or)
-    relevant_articles = [a for a in articles if is_relevant(a, keywords)]
-    context = "\n\n".join([
-        a.get("content") or ""
-        for a in relevant_articles
-    ])[:15000]
-    if not context.strip():
-        return {
-            "question": question,
-            "answer": "Cannot answer – no relevant context found.",
-            "sources": []
-        }
     # Step 3: Ask Mistral to answer using the context
     answer_prompt = (
         f"You are a concise news assistant. Answer the user's question clearly using the context below if relevant. "
         f"If the context is not helpful, you may rely on your own knowledge, but do not mention the context or question again.\n\n"
-        f"Context:\n{context}\n\n"
         f"Question: {question}\n\n"
         f"Answer:"
     )
@@ -125,8 +136,8 @@ async def ask_question(input: QuestionInput):
     return {
         "question": question,
         "answer": final_answer.strip(),
-        "sources": [
-            {"title": a["title"], "url": a["url"]}
-            for a in relevant_articles
-        ]
     }

 import json
 from nuse_modules.classifier import classify_question, REVERSE_MAP
 from nuse_modules.keyword_extracter import keywords_extractor
+from nuse_modules.google_search import search_google_news
 load_dotenv()
     "Content-Type": "application/json"
 }
+def should_extract_keywords(type_id: int) -> bool:
+    return type_id in {1, 2, 3, 4, 5, 6, 7, 10}
 def is_relevant(article, keywords):
     text = f"{article.get('title', '')} {article.get('content', '')}".lower()
     return any(kw.lower() in text for kw in keywords)
     print("Intent ID:", qid)
     print("Category:", REVERSE_MAP.get(qid, "unknown"))
+    necessary = should_extract_keywords(qid)
+    if necessary:
+        keywords = keywords_extractor(question)
+        print("Raw extracted keywords:", keywords)
+        if not keywords:
+            return {"error": "Keyword extraction failed."}
+        results = search_google_news(keywords)
+        for r in results:
+            print(r["title"], r["link"])
+        # Step 2: Fetch articles using AND, then fallback to OR
+        # query_and = " AND ".join(f'"{kw}"' for kw in keywords)
+        # articles = fetch_gnews_articles(query_and)
+        # if not articles:
+        #     query_or = " OR ".join(f'"{kw}"' for kw in keywords)
+        #     articles = fetch_gnews_articles(query_or)
+        # relevant_articles = [a for a in articles if is_relevant(a, keywords)]
+        # context = "\n\n".join([
+        #     a.get("content") or ""
+        #     for a in relevant_articles
+        # ])[:15000]
+        # if not context.strip():
+        #     return {
+        #         "question": question,
+        #         "answer": "Cannot answer – no relevant context found.",
+        #         "sources": []
+        #     }
     # Step 3: Ask Mistral to answer using the context
     answer_prompt = (
         f"You are a concise news assistant. Answer the user's question clearly using the context below if relevant. "
         f"If the context is not helpful, you may rely on your own knowledge, but do not mention the context or question again.\n\n"
+        # f"Context:\n{context}\n\n"
         f"Question: {question}\n\n"
         f"Answer:"
     )
     return {
         "question": question,
         "answer": final_answer.strip(),
+        # "sources": [
+        #     {"title": a["title"], "url": a["url"]}
+        #     for a in relevant_articles
+        # ]
     }