pdx97 commited on
Commit
6cdbdc2
·
verified ·
1 Parent(s): f0e61d0

Updated fetch function in app.py

Browse files

Force strict keyword search using "title" and "abstract"

Files changed (1) hide show
  1. app.py +81 -24
app.py CHANGED
@@ -4,13 +4,58 @@ import yaml
4
  import gradio as gr
5
  from smolagents import CodeAgent, HfApiModel, tool
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  @tool
8
- def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
9
- """Fetches the latest research papers from arXiv based on provided keywords.
10
 
11
  Args:
12
  keywords: A list of keywords to search for relevant papers.
13
- num_results: The number of papers to fetch (default is 3).
14
 
15
  Returns:
16
  A list of dictionaries containing:
@@ -21,33 +66,45 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
21
  - "link": A direct link to the paper on arXiv.
22
  """
23
  try:
24
- print(f"DEBUG: Searching arXiv papers with keywords: {keywords}") # Debug input
25
-
26
- #Properly format query with +AND+ for multiple keywords
27
- query = "+AND+".join([f"all:{kw}" for kw in keywords])
28
- query_encoded = urllib.parse.quote(query) # Encode spaces and special characters
29
-
30
- url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
31
-
32
- print(f"DEBUG: Query URL - {url}") # Debug URL
33
-
34
- feed = feedparser.parse(url)
35
 
 
36
  papers = []
 
37
  for entry in feed.entries:
38
- papers.append({
39
- "title": entry.title,
40
- "authors": ", ".join(author.name for author in entry.authors),
41
- "year": entry.published[:4], # Extract year
42
- "abstract": entry.summary,
43
- "link": entry.link
44
- })
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- return papers
47
 
48
  except Exception as e:
49
- print(f"ERROR: {str(e)}") # Debug errors
50
- return [f"Error fetching research papers: {str(e)}"]
51
 
52
 
53
 
 
4
  import gradio as gr
5
  from smolagents import CodeAgent, HfApiModel, tool
6
 
7
+ # @tool
8
+ # def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
9
+ # """Fetches the latest research papers from arXiv based on provided keywords.
10
+
11
+ # Args:
12
+ # keywords: A list of keywords to search for relevant papers.
13
+ # num_results: The number of papers to fetch (default is 3).
14
+
15
+ # Returns:
16
+ # A list of dictionaries containing:
17
+ # - "title": The title of the research paper.
18
+ # - "authors": The authors of the paper.
19
+ # - "year": The publication year.
20
+ # - "abstract": A summary of the research paper.
21
+ # - "link": A direct link to the paper on arXiv.
22
+ # """
23
+ # try:
24
+ # print(f"DEBUG: Searching arXiv papers with keywords: {keywords}") # Debug input
25
+
26
+ # #Properly format query with +AND+ for multiple keywords
27
+ # query = "+AND+".join([f"all:{kw}" for kw in keywords])
28
+ # query_encoded = urllib.parse.quote(query) # Encode spaces and special characters
29
+
30
+ # url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
31
+
32
+ # print(f"DEBUG: Query URL - {url}") # Debug URL
33
+
34
+ # feed = feedparser.parse(url)
35
+
36
+ # papers = []
37
+ # for entry in feed.entries:
38
+ # papers.append({
39
+ # "title": entry.title,
40
+ # "authors": ", ".join(author.name for author in entry.authors),
41
+ # "year": entry.published[:4], # Extract year
42
+ # "abstract": entry.summary,
43
+ # "link": entry.link
44
+ # })
45
+
46
+ # return papers
47
+
48
+ # except Exception as e:
49
+ # print(f"ERROR: {str(e)}") # Debug errors
50
+ # return [f"Error fetching research papers: {str(e)}"]
51
+
52
  @tool
53
+ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
54
+ """Fetches the latest research papers from arXiv based on **strict keyword presence**.
55
 
56
  Args:
57
  keywords: A list of keywords to search for relevant papers.
58
+ num_results: The number of papers to fetch (default is 5).
59
 
60
  Returns:
61
  A list of dictionaries containing:
 
66
  - "link": A direct link to the paper on arXiv.
67
  """
68
  try:
69
+ print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")
70
+
71
+ # Force strict keyword search using "title" and "abstract" only
72
+ query = "+AND+".join([f"(ti:\"{kw}\"+OR+abs:\"{kw}\")" for kw in keywords])
73
+ query_encoded = urllib.parse.quote(query) # Encode query for URL
74
+
75
+ url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=20&sortBy=submittedDate&sortOrder=descending"
76
+
77
+ print(f"DEBUG: Query URL - {url}")
 
 
78
 
79
+ feed = feedparser.parse(url)
80
  papers = []
81
+
82
  for entry in feed.entries:
83
+ title = entry.title.lower()
84
+ abstract = entry.summary.lower()
85
+
86
+ # Ensure ALL keywords appear in **either** title or abstract
87
+ if all(kw.lower() in title or kw.lower() in abstract for kw in keywords):
88
+ papers.append({
89
+ "title": entry.title,
90
+ "authors": ", ".join(author.name for author in entry.authors),
91
+ "year": entry.published[:4], # Extract year
92
+ "abstract": entry.summary,
93
+ "link": entry.link
94
+ })
95
+
96
+ # If no relevant papers found, return "No results found."
97
+ if not papers:
98
+ return [{"error": "No results found. Try different keywords."}]
99
+
100
+ # Prioritize papers where keywords appear in the **title**
101
+ papers.sort(key=lambda x: sum(kw.lower() in x["title"].lower() for kw in keywords), reverse=True)
102
 
103
+ return papers[:num_results] # Return top-matching papers
104
 
105
  except Exception as e:
106
+ print(f"ERROR: {str(e)}")
107
+ return [{"error": f"Error fetching research papers: {str(e)}"}]
108
 
109
 
110