File size: 2,811 Bytes
b4c04de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import requests
import xml.etree.ElementTree as ET
from scholarly import scholarly
class DataLoader:
def __init__(self):
print("DataLoader Init")
def fetch_arxiv_papers(self, query, limit=None): # Updated signature
"""
Fetches top 5 research papers from ArXiv based on the user query.
If <5 papers are found, expands the search using related topics.
Returns:
list: A list of dictionaries containing paper details (title, summary, link).
"""
def search_arxiv(query):
"""Helper function to query ArXiv API."""
url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5"
response = requests.get(url)
if response.status_code == 200:
root = ET.fromstring(response.text)
return [
{
"title": entry.find("{http://www.w3.org/2005/Atom}title").text,
"summary": entry.find("{http://www.w3.org/2005/Atom}summary").text,
"link": entry.find("{http://www.w3.org/2005/Atom}id").text
}
for entry in root.findall("{http://www.w3.org/2005/Atom}entry")
]
return []
papers = search_arxiv(query)
if len(papers) < 5 and self.search_agent: # If fewer than 5 papers, expand search
related_topics_response = self.search_agent.generate_reply(
messages=[{"role": "user", "content": f"Suggest 3 related research topics for '{query}'"}]
)
related_topics = related_topics_response.get("content", "").split("\n")
for topic in related_topics:
topic = topic.strip()
if topic and len(papers) < 5:
new_papers = search_arxiv(topic)
papers.extend(new_papers)
papers = papers[:5] # Ensure max 5 papers
if limit is not None:
papers = papers[:limit]
return papers
def fetch_google_scholar_papers(self, query):
"""
Fetches top 5 research papers from Google Scholar.
Returns:
list: A list of dictionaries containing paper details (title, summary, link)
"""
papers = []
search_results = scholarly.search_pubs(query)
for i, paper in enumerate(search_results):
if i >= 5:
break
papers.append({
"title": paper["bib"]["title"],
"summary": paper["bib"].get("abstract", "No summary available"),
"link": paper.get("pub_url", "No link available")
})
return papers
|