raghavNCI commited on
Commit
2c39b8a
·
1 Parent(s): 43cf665

changes v18

Browse files
Files changed (1) hide show
  1. question.py +15 -4
question.py CHANGED
@@ -25,14 +25,25 @@ HEADERS = {
25
  "Content-Type": "application/json"
26
  }
27
 
28
- def extract_last_keywords(raw: str, max_keywords=6):
29
  segments = raw.strip().split("\n")
 
 
30
  for line in reversed(segments):
31
- parts = [kw.strip().strip('"') for kw in line.split(",") if kw.strip()]
32
- if 1 <= len(parts) <= max_keywords:
33
- return parts
 
 
 
 
 
 
 
 
34
  return []
35
 
 
36
  def is_relevant(article, keywords):
37
  text = f"{article.get('title', '')} {article.get('content', '')}".lower()
38
  return any(kw.lower() in text for kw in keywords)
 
25
  "Content-Type": "application/json"
26
  }
27
 
28
+ def extract_last_keywords(raw: str, max_keywords=8):
29
  segments = raw.strip().split("\n")
30
+
31
+ # Ignore quoted or prompt lines
32
  for line in reversed(segments):
33
+ line = line.strip()
34
+ if line.lower().startswith("extract") or not line or len(line) < 10:
35
+ continue
36
+
37
+ # Look for lines with multiple comma-separated items
38
+ if line.count(",") >= 2:
39
+ parts = [kw.strip().strip('"') for kw in line.split(",") if kw.strip()]
40
+ # Ensure they're not just long phrases or sentence fragments
41
+ if all(len(p.split()) <= 3 for p in parts) and 1 <= len(parts) <= max_keywords:
42
+ return parts
43
+
44
  return []
45
 
46
+
47
  def is_relevant(article, keywords):
48
  text = f"{article.get('title', '')} {article.get('content', '')}".lower()
49
  return any(kw.lower() in text for kw in keywords)