pdx97 commited on
Commit
85e3933
·
verified ·
1 Parent(s): 23331b5

Updated app.py

Browse files

Stricter keyword seach

Files changed (1) hide show
  1. app.py +78 -22
app.py CHANGED
@@ -5,12 +5,59 @@ import gradio as gr
5
  from smolagents import CodeAgent, HfApiModel, tool
6
 
7
  @tool
8
- def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  """Fetches the latest research papers from arXiv based on provided keywords.
10
 
11
  Args:
12
  keywords: A list of keywords to search for relevant papers.
13
- num_results: The number of papers to fetch (default is 3).
14
 
15
  Returns:
16
  A list of dictionaries containing:
@@ -21,33 +68,42 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
21
  - "link": A direct link to the paper on arXiv.
22
  """
23
  try:
24
- print(f"DEBUG: Searching arXiv papers with keywords: {keywords}") # Debug input
25
-
26
- #Properly format query with +AND+ for multiple keywords
27
- query = "+AND+".join([f"all:{kw}" for kw in keywords])
28
  query_encoded = urllib.parse.quote(query) # Encode spaces and special characters
29
-
30
- url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
31
-
32
- print(f"DEBUG: Query URL - {url}") # Debug URL
33
-
34
- feed = feedparser.parse(url)
35
 
 
 
 
 
 
36
  papers = []
 
37
  for entry in feed.entries:
38
- papers.append({
39
- "title": entry.title,
40
- "authors": ", ".join(author.name for author in entry.authors),
41
- "year": entry.published[:4], # Extract year
42
- "abstract": entry.summary,
43
- "link": entry.link
44
- })
45
 
46
- return papers
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  except Exception as e:
49
- print(f"ERROR: {str(e)}") # Debug errors
50
- return [f"Error fetching research papers: {str(e)}"]
 
51
 
52
  # AI Model
53
  model = HfApiModel(
 
5
  from smolagents import CodeAgent, HfApiModel, tool
6
 
7
  @tool
8
+ # def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
9
+ # """Fetches the latest research papers from arXiv based on provided keywords.
10
+
11
+ # Args:
12
+ # keywords: A list of keywords to search for relevant papers.
13
+ # num_results: The number of papers to fetch (default is 3).
14
+
15
+ # Returns:
16
+ # A list of dictionaries containing:
17
+ # - "title": The title of the research paper.
18
+ # - "authors": The authors of the paper.
19
+ # - "year": The publication year.
20
+ # - "abstract": A summary of the research paper.
21
+ # - "link": A direct link to the paper on arXiv.
22
+ # """
23
+ # try:
24
+ # print(f"DEBUG: Searching arXiv papers with keywords: {keywords}") # Debug input
25
+
26
+ # #Properly format query with +AND+ for multiple keywords
27
+ # query = "+AND+".join([f"all:{kw}" for kw in keywords])
28
+ # query_encoded = urllib.parse.quote(query) # Encode spaces and special characters
29
+
30
+ # url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
31
+
32
+ # print(f"DEBUG: Query URL - {url}") # Debug URL
33
+
34
+ # feed = feedparser.parse(url)
35
+
36
+ # papers = []
37
+ # for entry in feed.entries:
38
+ # papers.append({
39
+ # "title": entry.title,
40
+ # "authors": ", ".join(author.name for author in entry.authors),
41
+ # "year": entry.published[:4], # Extract year
42
+ # "abstract": entry.summary,
43
+ # "link": entry.link
44
+ # })
45
+
46
+ # return papers
47
+
48
+ # except Exception as e:
49
+ # print(f"ERROR: {str(e)}") # Debug errors
50
+ # return [f"Error fetching research papers: {str(e)}"]
51
+
52
+ import feedparser
53
+ import urllib.parse
54
+
55
+ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
56
  """Fetches the latest research papers from arXiv based on provided keywords.
57
 
58
  Args:
59
  keywords: A list of keywords to search for relevant papers.
60
+ num_results: The number of papers to fetch (default is 5).
61
 
62
  Returns:
63
  A list of dictionaries containing:
 
68
  - "link": A direct link to the paper on arXiv.
69
  """
70
  try:
71
+ print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")
72
+
73
+ # Format query using "AND" to enforce strict keyword presence
74
+ query = "+AND+".join([f"ti:{kw}+OR+abs:{kw}" for kw in keywords])
75
  query_encoded = urllib.parse.quote(query) # Encode spaces and special characters
 
 
 
 
 
 
76
 
77
+ url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=20&sortBy=submittedDate&sortOrder=descending"
78
+
79
+ print(f"DEBUG: Query URL - {url}")
80
+
81
+ feed = feedparser.parse(url)
82
  papers = []
83
+
84
  for entry in feed.entries:
85
+ title = entry.title.lower()
86
+ abstract = entry.summary.lower()
 
 
 
 
 
87
 
88
+ # Ensure at least one keyword appears in the title or abstract
89
+ if any(kw.lower() in title or kw.lower() in abstract for kw in keywords):
90
+ papers.append({
91
+ "title": entry.title,
92
+ "authors": ", ".join(author.name for author in entry.authors),
93
+ "year": entry.published[:4], # Extract year
94
+ "abstract": entry.summary,
95
+ "link": entry.link
96
+ })
97
+
98
+ #Sort papers: First prioritize keyword in title, then abstract
99
+ papers.sort(key=lambda x: sum(kw.lower() in x["title"].lower() for kw in keywords), reverse=True)
100
+
101
+ return papers[:num_results] # Return top-matching papers
102
 
103
  except Exception as e:
104
+ print(f"ERROR: {str(e)}")
105
+ return [{"error": f"Error fetching research papers: {str(e)}"}]
106
+
107
 
108
  # AI Model
109
  model = HfApiModel(