ScholarAgent

Running

File size: 4,303 Bytes

from smolagents import CodeAgent, HfApiModel, load_tool, tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from scholarly import scholarly
import gradio as gr

@tool
def fetch_latest_research_papers(keywords: list, num_results: int = 1) -> list:
    """Fetches the latest research papers from Google Scholar based on provided keywords.
    Args:
        keywords: A list of keywords to search for relevant papers.
        num_results: The number of papers to fetch (default is 5).
    """
    try:
        print(f"DEBUG: Searching papers with keywords: {keywords}")  # Debug input
        query = " ".join([kw.strip() for kw in keywords if kw.strip()])  # Ensure clean query
        search_results = scholarly.search_pubs(query)
        papers = []
        for _ in range(num_results):  # Fetch extra papers to ensure we get recent ones
            paper = next(search_results, None)
            if paper:
                scholarly.fill(paper)  # Fetch additional metadata
                pub_year = paper['bib'].get('pub_year', 'Unknown Year')

                # Ensure year is an integer
                if pub_year != 'Unknown Year':
                    try:
                        pub_year = int(pub_year)
                    except ValueError:
                        pub_year = 0  # Handle invalid years

                print(f"DEBUG: Found paper - {paper['bib'].get('title', 'No Title')} ({pub_year})")

                papers.append({
                    "title": paper['bib'].get('title', 'No Title'),
                    "authors": paper['bib'].get('author', 'Unknown Authors'),
                    "year": pub_year,
                    "abstract": paper['bib'].get('abstract', 'No Abstract Available'),
                    "link": paper.get('pub_url', 'No Link Available')
                })

        # Sort by the latest publication year
        papers = sorted(papers, key=lambda x: x["year"] if isinstance(x["year"], int) else 0, reverse=True)

        # Return only the latest `num_results` papers
        return papers[:num_results]

    except Exception as e:
        print(f"ERROR: {str(e)}")  # Debug errors
        return [f"Error fetching research papers: {str(e)}"]


final_answer = FinalAnswerTool()

model = HfApiModel(
    max_tokens=2096,
    temperature=0.5,
    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
    custom_role_conversions=None,
)

with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
agent = CodeAgent(
    model=model,
    tools=[final_answer, fetch_latest_research_papers],
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name="ScholarAgent",
    description="An AI agent that fetches the latest research papers from Google Scholar based on user-defined keywords and filters.",
    prompt_templates=prompt_templates
)

def search_papers(user_input):
    keywords = [kw.strip() for kw in user_input.split(",") if kw.strip()]  # Ensure valid keywords
    print(f"DEBUG: Received input keywords - {keywords}")  # Debug user input
    
    if not keywords:
        print("DEBUG: No valid keywords provided.")
        return "Error: Please enter at least one valid keyword."
    
    results = fetch_latest_research_papers(keywords, num_results=1)
    print(f"DEBUG: Results received - {results}")  # Debug function output
    
    if isinstance(results, list) and results and isinstance(results[0], dict):
        return "\n\n".join([f"**Title:** {paper['title']}\n**Authors:** {paper['authors']}\n**Year:** {paper['year']}\n**Abstract:** {paper['abstract']}\n[Read More]({paper['link']})" for paper in results])
    
    print("DEBUG: No results found.")
    return "No results found. Try different keywords."


# Create a simple Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Google Scholar Research Paper Fetcher")
    keyword_input = gr.Textbox(label="Enter keywords (comma-separated)", placeholder="e.g., deep learning, reinforcement learning")
    output_display = gr.Markdown()
    search_button = gr.Button("Search")
    
    search_button.click(search_papers, inputs=[keyword_input], outputs=[output_display])

    
    print("DEBUG: Gradio UI is running. Waiting for user input...")

demo.launch()