raghavNCI commited on
Commit
587894c
·
1 Parent(s): 9ff5d3a

changes v13

Browse files
Files changed (1) hide show
  1. question.py +17 -6
question.py CHANGED
@@ -6,7 +6,7 @@ from pydantic import BaseModel
6
  from typing import List
7
  from redis_client import redis_client as r
8
  from dotenv import load_dotenv
9
- from urllib.parse import quote_plus
10
  import json
11
 
12
  load_dotenv()
@@ -25,6 +25,15 @@ HEADERS = {
25
  "Content-Type": "application/json"
26
  }
27
 
 
 
 
 
 
 
 
 
 
28
  def mistral_generate(prompt: str, max_new_tokens=128):
29
  payload = {
30
  "inputs": prompt,
@@ -57,17 +66,19 @@ async def ask_question(input: QuestionInput):
57
 
58
  raw_keywords = mistral_generate(keyword_prompt, max_new_tokens=32)
59
 
60
- print("Raw extracted keywords:", raw_keywords)
 
 
61
 
62
- if not raw_keywords:
63
  return {"error": "Keyword extraction failed."}
64
 
 
65
  # Clean and parse keywords
66
- keywords = [kw.strip().strip('"') for kw in raw_keywords.split(",") if kw.strip()]
67
  query_string = " OR ".join(f'"{kw}"' for kw in keywords)
68
- encoded_query = quote_plus(query_string)
69
-
70
  gnews_url = f"https://gnews.io/api/v4/search?q={encoded_query}&lang=en&max=3&expand=content&token={GNEWS_API_KEY}"
 
71
  print("GNews URL:", gnews_url)
72
 
73
  try:
 
6
  from typing import List
7
  from redis_client import redis_client as r
8
  from dotenv import load_dotenv
9
+ from urllib.parse import quote
10
  import json
11
 
12
  load_dotenv()
 
25
  "Content-Type": "application/json"
26
  }
27
 
28
+ def extract_last_keywords(raw: str, max_keywords=6):
29
+ segments = raw.strip().split("\n")
30
+ for line in reversed(segments):
31
+ parts = [kw.strip().strip('"') for kw in line.split(",") if kw.strip()]
32
+ if 1 <= len(parts) <= max_keywords:
33
+ return parts
34
+ return []
35
+
36
+
37
  def mistral_generate(prompt: str, max_new_tokens=128):
38
  payload = {
39
  "inputs": prompt,
 
66
 
67
  raw_keywords = mistral_generate(keyword_prompt, max_new_tokens=32)
68
 
69
+ keywords = extract_last_keywords(raw_keywords)
70
+
71
+ print("Raw extracted keywords:", keywords)
72
 
73
+ if not keywords:
74
  return {"error": "Keyword extraction failed."}
75
 
76
+
77
  # Clean and parse keywords
 
78
  query_string = " OR ".join(f'"{kw}"' for kw in keywords)
79
+ encoded_query = quote(query_string)
 
80
  gnews_url = f"https://gnews.io/api/v4/search?q={encoded_query}&lang=en&max=3&expand=content&token={GNEWS_API_KEY}"
81
+
82
  print("GNews URL:", gnews_url)
83
 
84
  try: