ScholarAgent

Running

App Files Files Community

pdx97 commited on Mar 8

Commit

85e3933

verified ·

1 Parent(s): 23331b5

Updated app.py

Browse files

Stricter keyword seach

Files changed (1) hide show

app.py +78 -22

app.py CHANGED Viewed

@@ -5,12 +5,59 @@ import gradio as gr
 from smolagents import CodeAgent, HfApiModel, tool
 @tool
-def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
     """Fetches the latest research papers from arXiv based on provided keywords.
     Args:
         keywords: A list of keywords to search for relevant papers.
-        num_results: The number of papers to fetch (default is 3).
     Returns:
         A list of dictionaries containing:
@@ -21,33 +68,42 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
             - "link": A direct link to the paper on arXiv.
     """
     try:
-        print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")  # Debug input
-        #Properly format query with +AND+ for multiple keywords
-        query = "+AND+".join([f"all:{kw}" for kw in keywords])
         query_encoded = urllib.parse.quote(query)  # Encode spaces and special characters
-        url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
-        print(f"DEBUG: Query URL - {url}")  # Debug URL
-        feed = feedparser.parse(url)
         papers = []
         for entry in feed.entries:
-            papers.append({
-                "title": entry.title,
-                "authors": ", ".join(author.name for author in entry.authors),
-                "year": entry.published[:4],  # Extract year
-                "abstract": entry.summary,
-                "link": entry.link
-            })
-        return papers
     except Exception as e:
-        print(f"ERROR: {str(e)}")  # Debug errors
-        return [f"Error fetching research papers: {str(e)}"]
 # AI Model
 model = HfApiModel(

 from smolagents import CodeAgent, HfApiModel, tool
 @tool
+# def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
+#     """Fetches the latest research papers from arXiv based on provided keywords.
+#     Args:
+#         keywords: A list of keywords to search for relevant papers.
+#         num_results: The number of papers to fetch (default is 3).
+#     Returns:
+#         A list of dictionaries containing:
+#             - "title": The title of the research paper.
+#             - "authors": The authors of the paper.
+#             - "year": The publication year.
+#             - "abstract": A summary of the research paper.
+#             - "link": A direct link to the paper on arXiv.
+#     """
+#     try:
+#         print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")  # Debug input
+#         #Properly format query with +AND+ for multiple keywords
+#         query = "+AND+".join([f"all:{kw}" for kw in keywords])
+#         query_encoded = urllib.parse.quote(query)  # Encode spaces and special characters
+#         url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
+#         print(f"DEBUG: Query URL - {url}")  # Debug URL
+#         feed = feedparser.parse(url)
+#         papers = []
+#         for entry in feed.entries:
+#             papers.append({
+#                 "title": entry.title,
+#                 "authors": ", ".join(author.name for author in entry.authors),
+#                 "year": entry.published[:4],  # Extract year
+#                 "abstract": entry.summary,
+#                 "link": entry.link
+#             })
+#         return papers
+#     except Exception as e:
+#         print(f"ERROR: {str(e)}")  # Debug errors
+#         return [f"Error fetching research papers: {str(e)}"]
+import feedparser
+import urllib.parse
+def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
     """Fetches the latest research papers from arXiv based on provided keywords.
     Args:
         keywords: A list of keywords to search for relevant papers.
+        num_results: The number of papers to fetch (default is 5).
     Returns:
         A list of dictionaries containing:
             - "link": A direct link to the paper on arXiv.
     """
     try:
+        print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")
+        # Format query using "AND" to enforce strict keyword presence
+        query = "+AND+".join([f"ti:{kw}+OR+abs:{kw}" for kw in keywords])
         query_encoded = urllib.parse.quote(query)  # Encode spaces and special characters
+        url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=20&sortBy=submittedDate&sortOrder=descending"
+        print(f"DEBUG: Query URL - {url}")
+        feed = feedparser.parse(url)
         papers = []
         for entry in feed.entries:
+            title = entry.title.lower()
+            abstract = entry.summary.lower()
+            # Ensure at least one keyword appears in the title or abstract
+            if any(kw.lower() in title or kw.lower() in abstract for kw in keywords):
+                papers.append({
+                    "title": entry.title,
+                    "authors": ", ".join(author.name for author in entry.authors),
+                    "year": entry.published[:4],  # Extract year
+                    "abstract": entry.summary,
+                    "link": entry.link
+                })
+        #Sort papers: First prioritize keyword in title, then abstract
+        papers.sort(key=lambda x: sum(kw.lower() in x["title"].lower() for kw in keywords), reverse=True)
+        return papers[:num_results]  # Return top-matching papers
     except Exception as e:
+        print(f"ERROR: {str(e)}")
+        return [{"error": f"Error fetching research papers: {str(e)}"}]
 # AI Model
 model = HfApiModel(