Spaces:
Running
Running
Updated app.py
Browse filesStricter keyword seach
app.py
CHANGED
@@ -5,12 +5,59 @@ import gradio as gr
|
|
5 |
from smolagents import CodeAgent, HfApiModel, tool
|
6 |
|
7 |
@tool
|
8 |
-
def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
"""Fetches the latest research papers from arXiv based on provided keywords.
|
10 |
|
11 |
Args:
|
12 |
keywords: A list of keywords to search for relevant papers.
|
13 |
-
num_results: The number of papers to fetch (default is
|
14 |
|
15 |
Returns:
|
16 |
A list of dictionaries containing:
|
@@ -21,33 +68,42 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
|
|
21 |
- "link": A direct link to the paper on arXiv.
|
22 |
"""
|
23 |
try:
|
24 |
-
print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")
|
25 |
-
|
26 |
-
#
|
27 |
-
query = "+AND+".join([f"
|
28 |
query_encoded = urllib.parse.quote(query) # Encode spaces and special characters
|
29 |
-
|
30 |
-
url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
|
31 |
-
|
32 |
-
print(f"DEBUG: Query URL - {url}") # Debug URL
|
33 |
-
|
34 |
-
feed = feedparser.parse(url)
|
35 |
|
|
|
|
|
|
|
|
|
|
|
36 |
papers = []
|
|
|
37 |
for entry in feed.entries:
|
38 |
-
|
39 |
-
|
40 |
-
"authors": ", ".join(author.name for author in entry.authors),
|
41 |
-
"year": entry.published[:4], # Extract year
|
42 |
-
"abstract": entry.summary,
|
43 |
-
"link": entry.link
|
44 |
-
})
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
except Exception as e:
|
49 |
-
print(f"ERROR: {str(e)}")
|
50 |
-
return [f"Error fetching research papers: {str(e)}"]
|
|
|
51 |
|
52 |
# AI Model
|
53 |
model = HfApiModel(
|
|
|
5 |
from smolagents import CodeAgent, HfApiModel, tool
|
6 |
|
7 |
@tool
|
8 |
+
# def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
|
9 |
+
# """Fetches the latest research papers from arXiv based on provided keywords.
|
10 |
+
|
11 |
+
# Args:
|
12 |
+
# keywords: A list of keywords to search for relevant papers.
|
13 |
+
# num_results: The number of papers to fetch (default is 3).
|
14 |
+
|
15 |
+
# Returns:
|
16 |
+
# A list of dictionaries containing:
|
17 |
+
# - "title": The title of the research paper.
|
18 |
+
# - "authors": The authors of the paper.
|
19 |
+
# - "year": The publication year.
|
20 |
+
# - "abstract": A summary of the research paper.
|
21 |
+
# - "link": A direct link to the paper on arXiv.
|
22 |
+
# """
|
23 |
+
# try:
|
24 |
+
# print(f"DEBUG: Searching arXiv papers with keywords: {keywords}") # Debug input
|
25 |
+
|
26 |
+
# #Properly format query with +AND+ for multiple keywords
|
27 |
+
# query = "+AND+".join([f"all:{kw}" for kw in keywords])
|
28 |
+
# query_encoded = urllib.parse.quote(query) # Encode spaces and special characters
|
29 |
+
|
30 |
+
# url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
|
31 |
+
|
32 |
+
# print(f"DEBUG: Query URL - {url}") # Debug URL
|
33 |
+
|
34 |
+
# feed = feedparser.parse(url)
|
35 |
+
|
36 |
+
# papers = []
|
37 |
+
# for entry in feed.entries:
|
38 |
+
# papers.append({
|
39 |
+
# "title": entry.title,
|
40 |
+
# "authors": ", ".join(author.name for author in entry.authors),
|
41 |
+
# "year": entry.published[:4], # Extract year
|
42 |
+
# "abstract": entry.summary,
|
43 |
+
# "link": entry.link
|
44 |
+
# })
|
45 |
+
|
46 |
+
# return papers
|
47 |
+
|
48 |
+
# except Exception as e:
|
49 |
+
# print(f"ERROR: {str(e)}") # Debug errors
|
50 |
+
# return [f"Error fetching research papers: {str(e)}"]
|
51 |
+
|
52 |
+
import feedparser
|
53 |
+
import urllib.parse
|
54 |
+
|
55 |
+
def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
|
56 |
"""Fetches the latest research papers from arXiv based on provided keywords.
|
57 |
|
58 |
Args:
|
59 |
keywords: A list of keywords to search for relevant papers.
|
60 |
+
num_results: The number of papers to fetch (default is 5).
|
61 |
|
62 |
Returns:
|
63 |
A list of dictionaries containing:
|
|
|
68 |
- "link": A direct link to the paper on arXiv.
|
69 |
"""
|
70 |
try:
|
71 |
+
print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")
|
72 |
+
|
73 |
+
# Format query using "AND" to enforce strict keyword presence
|
74 |
+
query = "+AND+".join([f"ti:{kw}+OR+abs:{kw}" for kw in keywords])
|
75 |
query_encoded = urllib.parse.quote(query) # Encode spaces and special characters
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=20&sortBy=submittedDate&sortOrder=descending"
|
78 |
+
|
79 |
+
print(f"DEBUG: Query URL - {url}")
|
80 |
+
|
81 |
+
feed = feedparser.parse(url)
|
82 |
papers = []
|
83 |
+
|
84 |
for entry in feed.entries:
|
85 |
+
title = entry.title.lower()
|
86 |
+
abstract = entry.summary.lower()
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
+
# Ensure at least one keyword appears in the title or abstract
|
89 |
+
if any(kw.lower() in title or kw.lower() in abstract for kw in keywords):
|
90 |
+
papers.append({
|
91 |
+
"title": entry.title,
|
92 |
+
"authors": ", ".join(author.name for author in entry.authors),
|
93 |
+
"year": entry.published[:4], # Extract year
|
94 |
+
"abstract": entry.summary,
|
95 |
+
"link": entry.link
|
96 |
+
})
|
97 |
+
|
98 |
+
#Sort papers: First prioritize keyword in title, then abstract
|
99 |
+
papers.sort(key=lambda x: sum(kw.lower() in x["title"].lower() for kw in keywords), reverse=True)
|
100 |
+
|
101 |
+
return papers[:num_results] # Return top-matching papers
|
102 |
|
103 |
except Exception as e:
|
104 |
+
print(f"ERROR: {str(e)}")
|
105 |
+
return [{"error": f"Error fetching research papers: {str(e)}"}]
|
106 |
+
|
107 |
|
108 |
# AI Model
|
109 |
model = HfApiModel(
|