Deep_Research_Agent

Sleeping

App Files Files Community

Lasdw commited on May 23

Commit

bef09db

1 Parent(s): 7904f75

updated UI and added CSS

Browse files

Files changed (8) hide show

.gitignore +2 -1
LICENSE +21 -0
README.md +50 -3
agent.py +35 -4
agent_langgraph.py +0 -399
app.py +275 -156
static/custom.css +478 -0
tools.py +5 -3

.gitignore CHANGED Viewed

@@ -15,4 +15,5 @@ __pycache__/*
 *.pyo
 *.pyd
-TEMPP/*

 *.pyo
 *.pyd
+TEMPP/*
+test.txt

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Vividh Mahajan
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Deep Research Agent
-emoji: 🤓
 colorFrom: yellow
 colorTo: red
 sdk: gradio
@@ -11,4 +11,51 @@ hf_oauth: true
 hf_oauth_expiration_minutes: 480
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: ScholarAI
+emoji: 🎓
 colorFrom: yellow
 colorTo: red
 sdk: gradio
 hf_oauth_expiration_minutes: 480
 ---
+# ScholarAI 🎓
+[![Build Status](https://img.shields.io/badge/build-passing-brightgreen)](https://huggingface.co/spaces/Lasdw/ScholarAI)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![Version](https://img.shields.io/badge/version-1.0.0-blue)](https://huggingface.co/spaces/Lasdw/ScholarAI)
+[![Python](https://img.shields.io/badge/python-3.11-blue)](https://www.python.org/downloads/)
+[![Gradio](https://img.shields.io/badge/gradio-5.29.1-orange)](https://gradio.app/)
+[![Stars](https://img.shields.io/github/stars/Lasdw/ScholarAI?style=social)](https://github.com/Lasdw/ScholarAI)
+An AI-powered research assistant that helps you find answers by searching the web, analyzing images, processing audio, and more.
+## Features
+- Web search and Wikipedia integration
+- Image analysis
+- Audio processing
+- Code analysis
+- Data file processing
+## Requirements
+```bash
+pip install -r requirements.txt
+```
+Key dependencies:
+- gradio: Web interface
+- langchain & langgraph: AI agent framework
+- openai: Language model integration
+- beautifulsoup4 & html2text: Web scraping
+- pytube & youtube-transcript-api: Video processing
+- whisper: Audio transcription
+- pandas & openpyxl: Data processing
+- Pillow: Image processing
+- PyPDF2 & pymupdf: PDF handling
+## License
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## Author
+Created by [Vividh Mahajan](https://huggingface.co/Lasdw)
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

agent.py CHANGED Viewed

@@ -57,7 +57,7 @@ load_dotenv()
 #webpage_scrape: Scrape content from a specific webpage URL when Tavily Search and Wikipedia Search do not return a result. Provide a valid URL to extract information from a particular web page.
 #Give preference to using Tavily Search and Wikipedia Search before using web_search or webpage_scrape. When Web_search does not return a result, use Tavily Search.
-SYSTEM_PROMPT = """ You are a genuis deep reseach assistant called TurboNerd, made by Vividh Mahajan. Answer the following questions as best you can. If it is a basic question, answer it using your internal knowledge. If it is a complex question that requires facts, use the tools to answer it DO NOT rely on your internal knowledge unless the tools fail to provide a result:
 For simple questions, you can use your internal knowledge and answer directly. If you do not understand the question, ask for clarification after trying to answer the question yourself.
 The way you use the tools is by specifying a json blob. These are the only tools you can use:
@@ -1320,16 +1320,47 @@ def create_agent_graph() -> StateGraph:
     return builder.compile()
 # Main agent class that integrates with your existing app.py
-class TurboNerd:
-    def __init__(self, max_iterations=35, apify_api_token=None):
         self.graph = create_agent_graph()
         self.tools = tools_config
         self.max_iterations = max_iterations  # Maximum iterations for the graph
         # Set Apify API token if provided
         if apify_api_token:
             os.environ["APIFY_API_TOKEN"] = apify_api_token
             print("Apify API token set successfully")
     def __call__(self, question: str, attachments: dict = None) -> str:
         """
@@ -1392,7 +1423,7 @@ class TurboNerd:
 # Example usage:
 if __name__ == "__main__":
-    agent = TurboNerd(max_iterations=25)
     response = agent("""The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places. TEMPP\excel.xlsx """)
     print("\nFinal Response:")
     print(response)

 #webpage_scrape: Scrape content from a specific webpage URL when Tavily Search and Wikipedia Search do not return a result. Provide a valid URL to extract information from a particular web page.
 #Give preference to using Tavily Search and Wikipedia Search before using web_search or webpage_scrape. When Web_search does not return a result, use Tavily Search.
+SYSTEM_PROMPT = """ You are a genuis deep reseach assistant called ScholarAI, made by Vividh Mahajan. Answer the following questions as best you can. If it is a basic question, answer it using your internal knowledge. If it is a complex question that requires facts, use the tools to answer it DO NOT rely on your internal knowledge unless the tools fail to provide a result:
 For simple questions, you can use your internal knowledge and answer directly. If you do not understand the question, ask for clarification after trying to answer the question yourself.
 The way you use the tools is by specifying a json blob. These are the only tools you can use:
     return builder.compile()
 # Main agent class that integrates with your existing app.py
+class ScholarAI:
+    def __init__(self, max_iterations=35, temperature=0.1, max_tokens=2000, model="gpt-4o-mini", apify_api_token=None):
+        # Check for OpenAI API key
+        if not os.getenv("OPENAI_API_KEY"):
+            raise ValueError("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
+        try:
+            # Test the API key with a simple request
+            test_llm = ChatOpenAI(
+                model=model,
+                temperature=temperature,
+                max_tokens=max_tokens
+            )
+            test_llm.invoke("test")  # This will fail if API key is invalid
+        except Exception as e:
+            error_msg = str(e).lower()
+            if "invalid_api_key" in error_msg or "incorrect_api_key" in error_msg:
+                raise ValueError("Invalid OpenAI API key. Please check your API key and try again.")
+            elif "rate_limit" in error_msg or "quota" in error_msg:
+                raise ValueError("OpenAI API rate limit exceeded or quota reached. Please try again later.")
+            else:
+                raise ValueError(f"Error initializing OpenAI client: {str(e)}")
         self.graph = create_agent_graph()
         self.tools = tools_config
         self.max_iterations = max_iterations  # Maximum iterations for the graph
+        # Update the global llm instance with the specified parameters
+        global llm
+        llm = ChatOpenAI(
+            model=model,
+            temperature=temperature,
+            max_tokens=max_tokens
+        )
         # Set Apify API token if provided
         if apify_api_token:
             os.environ["APIFY_API_TOKEN"] = apify_api_token
             print("Apify API token set successfully")
+        print(f"ScholarAI initialized with model={model}, temperature={temperature}, max_tokens={max_tokens}")
     def __call__(self, question: str, attachments: dict = None) -> str:
         """
 # Example usage:
 if __name__ == "__main__":
+    agent = ScholarAI(max_iterations=25)
     response = agent("""The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places. TEMPP\excel.xlsx """)
     print("\nFinal Response:")
     print(response)

agent_langgraph.py DELETED Viewed

@@ -1,399 +0,0 @@
-import os
-from typing import TypedDict, Annotated
-from langgraph.graph.message import add_messages
-from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage
-from langgraph.prebuilt import ToolNode
-from langchain.tools import Tool
-from langgraph.graph import START, END, StateGraph
-from langgraph.prebuilt import tools_condition
-from langchain_openai import ChatOpenAI
-from langchain_community.tools import DuckDuckGoSearchRun
-import getpass
-import subprocess
-import tempfile
-import time
-import random
-def run_python_code(code: str):
-    """Execute Python code in a temporary file and return the output."""
-    # Check for potentially dangerous operations
-    dangerous_operations = [
-        "os.system", "os.popen", "os.unlink", "os.remove",
-        "subprocess.run", "subprocess.call", "subprocess.Popen",
-        "shutil.rmtree", "shutil.move", "shutil.copy",
-        "open(", "file(", "eval(", "exec(",
-        "__import__"
-    ]
-    # Safe imports that should be allowed
-    safe_imports = {
-        "import datetime", "import math", "import random",
-        "import statistics", "import collections", "import itertools",
-        "import re", "import json", "import csv"
-    }
-    # Check for dangerous operations
-    for dangerous_op in dangerous_operations:
-        if dangerous_op in code:
-            return f"Error: Code contains potentially unsafe operations: {dangerous_op}"
-    # Check each line for imports
-    for line in code.splitlines():
-        line = line.strip()
-        if line.startswith("import ") or line.startswith("from "):
-            # Skip if it's in our safe list
-            if any(line.startswith(safe_import) for safe_import in safe_imports):
-                continue
-            return f"Error: Code contains potentially unsafe import: {line}"
-    # Add print statements to capture the result
-    # Find the last expression to capture its value
-    lines = code.splitlines()
-    modified_lines = []
-    for i, line in enumerate(lines):
-        modified_lines.append(line)
-        # If this is the last line and doesn't have a print statement
-        if i == len(lines) - 1 and not (line.strip().startswith("print(") or line.strip() == ""):
-            # Add a print statement for the last expression
-            if not line.strip().endswith(":"):  # Not a control structure
-                modified_lines.append(f"print('Result:', {line.strip()})")
-    modified_code = "\n".join(modified_lines)
-    try:
-        # Create a temporary file
-        with tempfile.NamedTemporaryFile(suffix='.py', delete=False) as temp:
-            temp_path = temp.name
-            # Write the code to the file
-            temp.write(modified_code.encode('utf-8'))
-        # Run the Python file with restricted permissions
-        result = subprocess.run(
-            ['python', temp_path],
-            capture_output=True,
-            text=True,
-            timeout=10  # Set a timeout to prevent infinite loops
-        )
-        # Clean up the temporary file
-        os.unlink(temp_path)
-        # Return the output or error
-        if result.returncode == 0:
-            output = result.stdout.strip()
-            # If the output is empty but the code ran successfully
-            if not output:
-                # Try to extract the last line and evaluate it
-                last_line = lines[-1].strip()
-                if not last_line.startswith("print") and not last_line.endswith(":"):
-                    return f"Code executed successfully. The result of the last expression '{last_line}' should be its value."
-                else:
-                    return "Code executed successfully with no output."
-            return output
-        else:
-            return f"Error executing code: {result.stderr}"
-    except subprocess.TimeoutExpired:
-        # Clean up if timeout
-        os.unlink(temp_path)
-        return "Error: Code execution timed out after 10 seconds."
-    except Exception as e:
-        return f"Error executing code: {str(e)}"
-# Create the Python code execution tool
-code_tool = Tool(
-    name="python_code",
-    func=run_python_code,
-    description="Execute Python code. Provide the complete Python code as a string. The code will be executed and the output will be returned. Use this for calculations, data processing, or any task that can be solved with Python."
-)
-# Custom search function with error handling
-def safe_web_search(query: str) -> str:
-    """Search the web safely with error handling and retry logic."""
-    try:
-        # Use the DuckDuckGoSearchRun tool
-        search_tool = DuckDuckGoSearchRun()
-        result = search_tool.invoke(query)
-        # If we get an empty result, provide a fallback
-        if not result or len(result.strip()) < 10:
-            return f"Unable to find specific information about '{query}'. Please try a different search query or check a reliable source like Wikipedia."
-        return result
-    except Exception as e:
-        # Add a small random delay to avoid rate limiting
-        time.sleep(random.uniform(1, 2))
-        # Return a helpful error message with suggestions
-        error_msg = f"I encountered an issue while searching for '{query}': {str(e)}. "
-        return error_msg
-# Create the search tool
-search_tool = Tool(
-    name="web_search",
-    func=safe_web_search,
-    description="Search the web for current information. Provide a specific search query."
-)
-# System prompt to guide the model's behavior
-SYSTEM_PROMPT = """You are a genius AI assistant called TurboNerd.
-Always provide accurate and helpful responses based on the information you find. You have tools at your disposal to help, use them whenever you can to improve the accuracy of your responses.
-When you receive an input from the user, break it into smaller parts and address each part systematically:
-1. For information retrieval (like finding current data, statistics, etc.), use the web_search tool.
-   - If the search fails, don't repeatedly attempt identical searches. Provide the best information you have and be honest about limitations.
-2. For calculations, data processing, or computational tasks, use the python_code tool:
-   - Write complete, self-contained Python code
-   - Include print statements for results
-   - Keep code simple and concise
-Keep your final answer concise and direct, addressing all parts of the user's question clearly. DO NOT include any other text in your response, just the answer.
-"""
-#Your response will be evaluated for accuracy and completeness. After you provide an answer, an evaluator will check your work and may ask you to improve it. The evaluation process has a maximum of 3 attempts.
-# Generate the chat interface, including the tools
-llm = ChatOpenAI(
-    model="gpt-4o-mini",
-    temperature=0
-)
-chat = llm
-tools = [search_tool, code_tool]
-chat_with_tools = chat.bind_tools(tools)
-# Generate the AgentState and Agent graph
-class AgentState(TypedDict):
-    messages: Annotated[list[AnyMessage], add_messages]
-def assistant(state: AgentState):
-    # Add system message if it's the first message
-    print("Assistant Called...\n\n")
-    print(f"Assistant state keys: {state.keys()}")
-    print(f"Assistant message count: {len(state['messages'])}")
-    if len(state["messages"]) == 1 and isinstance(state["messages"][0], HumanMessage):
-        messages = [SystemMessage(content=SYSTEM_PROMPT)] + state["messages"]
-    else:
-        messages = state["messages"]
-    response = chat_with_tools.invoke(messages)
-    print(f"Assistant response type: {type(response)}")
-    if hasattr(response, 'tool_calls') and response.tool_calls:
-        print(f"Tool calls detected: {len(response.tool_calls)}")
-    return {
-        "messages": [response],
-    }
-# Add evaluator function (commented out)
-"""
-def evaluator(state: AgentState):
-    print("Evaluator Called...\n\n")
-    print(f"Evaluator state keys: {state.keys()}")
-    print(f"Evaluator message count: {len(state['messages'])}")
-    # Get the current evaluation attempt count or initialize to 0
-    attempt_count = state.get("evaluation_attempt_count", 0)
-    # Create a new evaluator LLM instance
-    evaluator_llm = ChatOpenAI(
-        model="gpt-4o-mini",
-        temperature=0
-    )
-    # Create evaluation prompt
-    evaluation_prompt = f\"""You are an evaluator for AI assistant responses. Your job is to:
-1. Check if the answer is complete and accurate
-   - Does it address all parts of the user's question?
-   - Is the information factually correct to the best of your knowledge?
-2. Identify specific improvements needed, if any
-   - Be precise about what needs to be fixed
-3. Return your evaluation in one of these formats:
-   - "ACCEPT: [brief reason]" if the answer is good enough
-   - "IMPROVE: [specific instructions]" if improvements are needed
-This is evaluation attempt {attempt_count + 1} out of 3 maximum attempts.
-Acceptance criteria:
-- On attempts 1-2: The answer must be complete, accurate, and well-explained
-- On attempt 3: Accept the answer if it's reasonably correct, even if not perfect
-Available tools the assistant can use:
-- web_search: For retrieving information from the web
-- python_code: For executing Python code to perform calculations or data processing
-Be realistic about tool limitations - if a tool is failing repeatedly, don't ask the assistant to keep trying it.
-\"""
-    # Get the last message (the current answer)
-    last_message = state["messages"][-1]
-    print(f"Last message to evaluate: {last_message.content}")
-    # Create evaluation message
-    evaluation_message = [
-        SystemMessage(content=evaluation_prompt),
-        HumanMessage(content=f"Evaluate this answer: {last_message.content}")
-    ]
-    # Get evaluation
-    evaluation = evaluator_llm.invoke(evaluation_message)
-    print(f"Evaluation result: {evaluation.content}")
-    # Create an AIMessage with the evaluation content
-    evaluation_ai_message = AIMessage(content=evaluation.content)
-    # Return both the evaluation message and the evaluation result
-    return {
-        "messages": state["messages"] + [evaluation_ai_message],
-        "evaluation_result": evaluation.content,
-        "evaluation_attempt_count": attempt_count + 1
-    }
-"""
-# Create the graph
-def create_agent_graph() -> StateGraph:
-    """Create the complete agent graph."""
-    builder = StateGraph(AgentState)
-    # Define nodes: these do the work
-    builder.add_node("assistant", assistant)
-    builder.add_node("tools", ToolNode(tools))
-    # builder.add_node("evaluator", evaluator)  # Commented out evaluator
-    # Define edges: these determine how the control flow moves
-    builder.add_edge(START, "assistant")
-    # First, check if the assistant's output contains tool calls
-    def debug_tools_condition(state):
-        # Check if the last message has tool calls
-        last_message = state["messages"][-1]
-        print(f"Last message type: {type(last_message)}")
-        has_tool_calls = False
-        if hasattr(last_message, "tool_calls") and last_message.tool_calls:
-            has_tool_calls = True
-            print(f"Tool calls found: {last_message.tool_calls}")
-        result = "tools" if has_tool_calls else None
-        print(f"Tools condition result: {result}")
-        return result
-    builder.add_conditional_edges(
-        "assistant",
-        debug_tools_condition,
-        {
-            "tools": "tools",
-            None: END  # Changed from evaluator to END
-        }
-    )
-    # Tools always goes back to assistant
-    builder.add_edge("tools", "assistant")
-    # Add evaluation edges with attempt counter (commented out)
-    """
-    def evaluation_condition(state: AgentState) -> str:
-        # Print the state keys to debug
-        print(f"Evaluation condition state keys: {state.keys()}")
-        # Get the evaluation result from the state
-        evaluation_result = state.get("evaluation_result", "")
-        print(f"Evaluation result: {evaluation_result}")
-        # Get the evaluation attempt count or initialize to 0
-        attempt_count = state.get("evaluation_attempt_count", 0)
-        # Increment the attempt count
-        attempt_count += 1
-        print(f"Evaluation attempt: {attempt_count}")
-        # If we've reached max attempts or evaluation accepts the answer, end
-        if attempt_count >= 3 or evaluation_result.startswith("ACCEPT"):
-            return "end"
-        else:
-            return "assistant"
-    builder.add_conditional_edges(
-        "evaluator",
-        evaluation_condition,
-        {
-            "end": END,
-            "assistant": "assistant"
-        }
-    )
-    """
-    # Compile with a reasonable recursion limit to prevent infinite loops
-    return builder.compile()
-# Main agent class that integrates with your existing app.py
-class TurboNerd:
-    def __init__(self, max_execution_time=30):
-        self.graph = create_agent_graph()
-        self.tools = tools
-        self.max_execution_time = max_execution_time  # Maximum execution time in seconds
-    def __call__(self, question: str) -> str:
-        """Process a question and return an answer."""
-        # Initialize the state with the question
-        initial_state = {
-            "messages": [HumanMessage(content=question)],
-        }
-        # Run the graph with timeout
-        print(f"Starting graph execution with question: {question}")
-        start_time = time.time()
-        try:
-            # Set a reasonable recursion limit
-            result = self.graph.invoke(initial_state, config={"recursion_limit": 10})
-            # Print the final state for debugging
-            print(f"Final state keys: {result.keys()}")
-            print(f"Final message count: {len(result['messages'])}")
-            # Extract the final message
-            final_message = result["messages"][-1]
-            return final_message.content
-        except Exception as e:
-            elapsed_time = time.time() - start_time
-            print(f"Error after {elapsed_time:.2f} seconds: {str(e)}")
-            # If we've been running too long, return a timeout message
-            if elapsed_time > self.max_execution_time:
-                return f"""I wasn't able to complete the full analysis within the time limit, but here's what I found:
-The population of New York City is approximately 8.8 million (as of the 2020 Census).
-For a population doubling at 2% annual growth rate, it would take about 35 years. This can be calculated using the Rule of 70, which states that dividing 70 by the growth rate gives the approximate doubling time:
-70 ÷ 2 = 35 years
-You can verify this with a Python calculation:
-```python
-years = 0
-population = 1
-while population < 2:
-    population *= 1.02  # 2% growth
-    years += 1
-print(years)  # Result: 35
-```"""
-            # Otherwise return the error
-            return f"I encountered an error while processing your question: {str(e)}"
-# Example usage:
-if __name__ == "__main__":
-    agent = TurboNerd(max_execution_time=30)
-    response = agent("What is the population of New York City? Then write a Python program to calculate how many years it would take for the population to double at a 2% annual growth rate.")
-    print("\nFinal Response:")
-    print(response)

app.py CHANGED Viewed

@@ -4,13 +4,25 @@ import requests
 import inspect
 import pandas as pd
 import base64
-from agent import TurboNerd
 from rate_limiter import QueryRateLimiter
 from flask import request
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 ALLOWED_FILE_EXTENSIONS = [".mp3", ".xlsx", ".py", ".png", ".jpg", ".jpeg", ".gif", ".txt", ".md", ".json", ".csv", ".yml", ".yaml", ".html", ".css", ".js"]
 # Initialize rate limiter (5 queries per hour)
 query_limiter = QueryRateLimiter(max_queries_per_hour=5)
@@ -18,17 +30,10 @@ query_limiter = QueryRateLimiter(max_queries_per_hour=5)
 # Dictionary to store session-specific conversation histories
 session_histories = {}
-# --- Basic Agent Definition ---
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-        self.agent = TurboNerd()
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        answer = self.agent(question)
-        print(f"Agent returning answer: {answer[:50]}...")
-        return answer
 # --- Chat Interface Functions ---
 def format_history_for_agent(history: list) -> str:
@@ -47,11 +52,90 @@ def format_history_for_agent(history: list) -> str:
     return "\n".join(formatted_history)
-def chat_with_agent(question: str, file_uploads, history: list) -> tuple:
     """
-    Handle chat interaction with TurboNerd agent, now with file upload support.
     """
-    if not question.strip() and not file_uploads:
         return history, ""
     try:
@@ -62,7 +146,6 @@ def chat_with_agent(question: str, file_uploads, history: list) -> tuple:
         # Initialize or get session history
         if session_id not in session_histories:
             session_histories[session_id] = []
-            # If we have existing history, add it to the session history
             if history:
                 session_histories[session_id].extend(history)
@@ -70,8 +153,30 @@ def chat_with_agent(question: str, file_uploads, history: list) -> tuple:
         history.append({"role": "user", "content": question})
         session_histories[session_id].append({"role": "user", "content": question})
-        # Initialize agent
-        agent = TurboNerd()
         # Process uploaded files if any
         attachments = {}
@@ -82,16 +187,26 @@ def chat_with_agent(question: str, file_uploads, history: list) -> tuple:
                 if file is not None:
                     file_path = file.name
                     file_name = os.path.basename(file_path)
-                    file_ext = os.path.splitext(file_name)[1].lower()
-                    # Check if file extension is allowed
-                    if file_ext in ALLOWED_FILE_EXTENSIONS:
-                        # Read file content and encode as base64
                         with open(file_path, "rb") as f:
                             file_content = f.read()
-                            file_content_b64 = base64.b64encode(file_content).decode("utf-8")
                             attachments[file_name] = file_content_b64
-                            file_info += f"\nUploaded file: {file_path}"
             if file_info:
                 if question.strip():
@@ -101,7 +216,6 @@ def chat_with_agent(question: str, file_uploads, history: list) -> tuple:
         # Format the session-specific conversation history
         conversation_history = format_history_for_agent(session_histories[session_id])
-        print(f"Current conversation history:\n{conversation_history}")  # Debug print
         # Prepare the full context for the agent
         full_context = f"Question: {question}\n\nConversation History:\n{conversation_history}"
@@ -115,25 +229,23 @@ def chat_with_agent(question: str, file_uploads, history: list) -> tuple:
         # Format the response to show thought process
         formatted_response = ""
         if "Thought:" in response:
-            # Split the response into sections
             sections = response.split("\n\n")
             for section in sections:
                 if section.startswith("Thought:"):
-                    formatted_response += f"🤔 {section[7:].strip()}\n\n"
                 elif section.startswith("Action:"):
-                    # Extract the tool being used
                     if "action" in section and "action_input" in section:
                         try:
                             import json
                             action_json = json.loads(section.split("```json")[1].split("```")[0].strip())
                             tool_name = action_json.get("action", "").replace("_", " ").title()
-                            formatted_response += f"🛠️ Using {tool_name}...\n\n"
                         except:
-                            formatted_response += f"🛠️ {section[7:].strip()}\n\n"
                 elif section.startswith("Observation:"):
-                    formatted_response += f"📝 {section[11:].strip()}\n\n"
                 elif section.startswith("Final Answer:"):
-                    formatted_response += f"✨ {section[12:].strip()}\n\n"
                 else:
                     formatted_response += f"{section}\n\n"
         else:
@@ -144,23 +256,15 @@ def chat_with_agent(question: str, file_uploads, history: list) -> tuple:
         session_histories[session_id].append({"role": "assistant", "content": formatted_response})
         return history, ""
-    except RecursionError as e:
-        error_message = (
-            "I apologize, but I've reached my thinking limit while trying to answer your question. "
-            "This usually happens when the question requires too many steps to solve and therefore too much money"
-            "Could you please try breaking down your question into smaller, more specific parts? "
-            "For example, instead of asking about multiple things at once, try asking about one aspect at a time."
-        )
-        history.append({"role": "assistant", "content": error_message})
-        if session_id in session_histories:
-            session_histories[session_id].append({"role": "assistant", "content": error_message})
-        return history, ""
     except Exception as e:
         error_str = str(e).lower()
-        if "credit" in error_str or "quota" in error_str or "limit" in error_str or "exceeded" in error_str or "OPENAI_API_KEY" in error_str or "TAVILY_API_KEY" in error_str:
-            error_message = (
-                "It seems I've run out of API credits. "
-                "Please try again later or tomorrow when the credits reset. ")
         else:
             error_message = f"Error: {str(e)}"
@@ -176,7 +280,7 @@ def clear_chat():
 # --- Evaluation Functions ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
@@ -193,9 +297,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -312,133 +421,143 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         return status_message, results_df
 # --- Build Gradio Interface using Blocks with Tabs ---
-with gr.Blocks(title="TurboNerd Agent🤓") as demo:
-    gr.Markdown("# TurboNerd 🤓- The Deep Research Agent \n ### Made by Vividh Mahajan - @Lasdw on HuggingFace")
-    with gr.Tabs():
-        # Tab 1: Chat Interface
-        with gr.TabItem("🤓", id="chat"):
             gr.Markdown("""
-            ## Chat with TurboNerd 🤓
-            Ask any question and get an answer from TurboNerd. The agent can search the web, Wikipedia, analyze images, process audio, and more!
-            Note: For best results, ask specific questions that have factual answers andd have the question be properly formatted as given below. Some websites, like reddit, may be inaccessible due to web scraping restrictions.
-            ### Example Questions:
-            **Research & Analysis:**
-            - "Find the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists. Cross-reference this information with their Wikipedia page and any recent news articles."
-            - "Analyze this image of a mathematical equation, and find an academic papers that use this equation."
-            **Multi-Modal Analysis:**
-            - "I have an interview recording and a transcript. Compare the audio transcription with the provided transcript, identify any discrepancies."
-            - "This image shows a historical document. Find me the historical events from that era."
-            **Code & Data Processing:**
-            - "I have a Python script and an Excel file with data. Analyze the code's functionality and suggest improvements based on the data patterns."
-            - "This code contains a bug. Debug it ."
-            The agent can handle multiple file uploads and combine information from various sources to provide comprehensive answers. Try asking complex questions that require multiple tools working together!
-            """)
-            gr.Markdown("""
-            ### Disclaimer
-            This tool is designed for educational and research purposes only. It is not intended for malicious use.
-            """)
             with gr.Row():
-                with gr.Column(scale=4):
-                    chatbot = gr.Chatbot(
-                        height=300,
-                        type="messages"  # Use the new messages format
-                    )
-                    # remaining_queries = gr.Textbox(
-                    #     label="Remaining Queries",
-                    #     value="Remaining queries this hour: 5/5",
-                    #     interactive=False
-                    # )
                     with gr.Row():
-                        question_input = gr.Textbox(
-                            label="Ask a question",
-                            placeholder="Type your question here...",
-                            lines=9,
-                            max_lines=9,
-                            container=True,
-                            scale=3
-                        )
-                        file_upload = gr.File(
-                            label="Upload Files",
-                            file_types=ALLOWED_FILE_EXTENSIONS,
-                            file_count="multiple",
-                            scale=1
-                        )
-                    with gr.Row():
-                        submit_btn = gr.Button("Send", variant="primary")
-            # Chat interface event handlers
-            submit_btn.click(
-                fn=chat_with_agent,
-                inputs=[question_input, file_upload, chatbot],
-                outputs=[chatbot, question_input]
-            )
-            question_input.submit(
-                fn=chat_with_agent,
-                inputs=[question_input, file_upload, chatbot],
-                outputs=[chatbot, question_input]
-            )
-        # Tab 2: Evaluation Interface
-        with gr.TabItem(" ", id="evaluation"):
-            gr.Markdown("""
-            # You found a secret page 🤫
-            ## Agent Evaluation Runner for the AI Agents course on HF :P
-            ## See my ranking (@Lasdw) on the course [here](https://huggingface.co/spaces/agents-course/Students_leaderboard)
-            ## Below is the original README.md for the space
-            **Instructions:**
-            1. Log in to your Hugging Face account using the button below.
-            2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-            ---
-            **Disclaimers:**
-            Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions).
-            This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution.
-            """)
-            gr.LoginButton()
-            run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
-            status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-            results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-            run_button.click(
-                fn=run_and_submit_all,
-                outputs=[status_output, results_table]
-            )
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
         print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
-        print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for TurboNerd Agent...")
     demo.launch(debug=True, share=False, show_api=False, favicon_path="static/favicon.ico", enable_monitoring=True)

 import inspect
 import pandas as pd
 import base64
+from agent import ScholarAI
 from rate_limiter import QueryRateLimiter
 from flask import request
+import PyPDF2
+import fitz  # PyMuPDF
+import time
+from typing import List, Tuple, Optional
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage, AIMessage
+# Load custom CSS
+with open("static/custom.css", "r", encoding="utf-8") as f:
+    custom_css = f.read()
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 ALLOWED_FILE_EXTENSIONS = [".mp3", ".xlsx", ".py", ".png", ".jpg", ".jpeg", ".gif", ".txt", ".md", ".json", ".csv", ".yml", ".yaml", ".html", ".css", ".js"]
+MAX_FILE_SIZE_MB = 10
+CHUNK_SIZE = 1000  # characters per chunk for text processing
 # Initialize rate limiter (5 queries per hour)
 query_limiter = QueryRateLimiter(max_queries_per_hour=5)
 # Dictionary to store session-specific conversation histories
 session_histories = {}
+# --- Model Settings ---
+DEFAULT_TEMPERATURE = 0.1
+DEFAULT_MAX_TOKENS = 2000
+DEFAULT_MODEL = "gpt-4o-mini"
 # --- Chat Interface Functions ---
 def format_history_for_agent(history: list) -> str:
     return "\n".join(formatted_history)
+def validate_inputs(question: str, file_uploads: List[gr.File]) -> Tuple[bool, str]:
+    """Validate user inputs before processing."""
+    if not question.strip() and (not file_uploads or len(file_uploads) == 0):
+        return False, "Please enter a question or upload a file."
+    if len(question) > 2000:
+        return False, "Question is too long. Please keep it under 2000 characters."
+    if file_uploads:
+        for file in file_uploads:
+            if file is None:
+                continue
+            file_path = file.name
+            if not os.path.exists(file_path):
+                return False, f"File {os.path.basename(file_path)} not found."
+            file_size = os.path.getsize(file_path) / (1024 * 1024)  # Convert to MB
+            if file_size > MAX_FILE_SIZE_MB:
+                return False, f"File {os.path.basename(file_path)} is too large. Maximum size is {MAX_FILE_SIZE_MB}MB."
+            file_ext = os.path.splitext(file_path)[1].lower()
+            if file_ext not in ALLOWED_FILE_EXTENSIONS:
+                return False, f"File {os.path.basename(file_path)} has an unsupported format. Allowed formats: {', '.join(ALLOWED_FILE_EXTENSIONS)}"
+    return True, ""
+def process_document(file_path: str, progress=gr.Progress()) -> List[str]:
+    """Process document and return chunks with progress bar."""
+    file_ext = os.path.splitext(file_path)[1].lower()
+    chunks = []
+    try:
+        if file_ext == '.pdf':
+            # Process PDF
+            doc = fitz.open(file_path)
+            total_pages = len(doc)
+            for page_num in progress.tqdm(range(total_pages), desc="Processing PDF pages"):
+                page = doc[page_num]
+                text = page.get_text()
+                # Split text into chunks
+                for i in range(0, len(text), CHUNK_SIZE):
+                    chunk = text[i:i + CHUNK_SIZE]
+                    if chunk.strip():
+                        chunks.append(f"[Page {page_num + 1}] {chunk}")
+                time.sleep(0.1)  # Small delay to show progress
+        elif file_ext in ['.txt', '.md', '.json', '.csv', '.yml', '.yaml', '.html', '.css', '.js', '.py']:
+            # Process text files
+            with open(file_path, 'r', encoding='utf-8') as f:
+                text = f.read()
+                total_chunks = len(text) // CHUNK_SIZE + (1 if len(text) % CHUNK_SIZE else 0)
+                for i in progress.tqdm(range(0, len(text), CHUNK_SIZE), desc="Processing text chunks"):
+                    chunk = text[i:i + CHUNK_SIZE]
+                    if chunk.strip():
+                        chunks.append(chunk)
+                    time.sleep(0.1)  # Small delay to show progress
+        elif file_ext in ['.xlsx']:
+            # Process Excel files
+            df = pd.read_excel(file_path)
+            total_rows = len(df)
+            for i in progress.tqdm(range(0, total_rows, CHUNK_SIZE), desc="Processing Excel rows"):
+                chunk_df = df.iloc[i:i + CHUNK_SIZE]
+                chunks.append(chunk_df.to_string())
+                time.sleep(0.1)  # Small delay to show progress
+        return chunks
+    except Exception as e:
+        return [f"Error processing file: {str(e)}"]
+def chat_with_agent(question: str, file_uploads, history: list, temperature: float, max_tokens: int, model: str, progress=gr.Progress()) -> tuple:
     """
+    Handle chat interaction with ScholarAI agent, now with file upload support and input validation.
     """
+    # Validate inputs
+    is_valid, error_message = validate_inputs(question, file_uploads)
+    if not is_valid:
+        history.append({"role": "assistant", "content": f"❌ {error_message}"})
         return history, ""
     try:
         # Initialize or get session history
         if session_id not in session_histories:
             session_histories[session_id] = []
             if history:
                 session_histories[session_id].extend(history)
         history.append({"role": "user", "content": question})
         session_histories[session_id].append({"role": "user", "content": question})
+        try:
+            # Initialize agent with current settings
+            agent = ScholarAI(
+                max_iterations=35,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                model=model
+            )
+            print("Agent initialized successfully with Temperature: ", temperature, "Max Tokens: ", max_tokens, "Model: ", model)
+        except ValueError as e:
+            error_message = str(e)
+            if "API key not found" in error_message:
+                error_message = "OpenAI API key not found. Please set the OPENAI_API_KEY environment variable."
+            elif "Invalid OpenAI API key" in error_message:
+                error_message = "Invalid OpenAI API key. Please check your API key and try again."
+            elif "rate limit" in error_message.lower() or "quota" in error_message.lower():
+                error_message = "OpenAI API rate limit exceeded or quota reached. Please try again later."
+            else:
+                error_message = f"Error initializing AI agent: {error_message}"
+            history.append({"role": "assistant", "content": error_message})
+            if session_id in session_histories:
+                session_histories[session_id].append({"role": "assistant", "content": error_message})
+            return history, ""
         # Process uploaded files if any
         attachments = {}
                 if file is not None:
                     file_path = file.name
                     file_name = os.path.basename(file_path)
+                    # Process document and get chunks
+                    chunks = process_document(file_path, progress)
+                    if len(chunks) > 1:
+                        file_info += f"\nProcessing {file_name} in {len(chunks)} chunks..."
+                        # Process each chunk
+                        for i, chunk in enumerate(chunks, 1):
+                            chunk_name = f"{file_name}_chunk_{i}"
+                            chunk_content = base64.b64encode(chunk.encode('utf-8')).decode('utf-8')
+                            attachments[chunk_name] = chunk_content
+                            file_info += f"\nProcessed chunk {i}/{len(chunks)}"
+                    else:
+                        # Single chunk or error
                         with open(file_path, "rb") as f:
                             file_content = f.read()
+                            file_content_b64 = base64.b64encode(file_content).decode('utf-8')
                             attachments[file_name] = file_content_b64
+                            file_info += f"\nUploaded file: {file_name}"
             if file_info:
                 if question.strip():
         # Format the session-specific conversation history
         conversation_history = format_history_for_agent(session_histories[session_id])
         # Prepare the full context for the agent
         full_context = f"Question: {question}\n\nConversation History:\n{conversation_history}"
         # Format the response to show thought process
         formatted_response = ""
         if "Thought:" in response:
             sections = response.split("\n\n")
             for section in sections:
                 if section.startswith("Thought:"):
+                    formatted_response += f"{section[7:].strip()}\n\n"
                 elif section.startswith("Action:"):
                     if "action" in section and "action_input" in section:
                         try:
                             import json
                             action_json = json.loads(section.split("```json")[1].split("```")[0].strip())
                             tool_name = action_json.get("action", "").replace("_", " ").title()
+                            formatted_response += f"Using {tool_name}...\n\n"
                         except:
+                            formatted_response += f"{section[7:].strip()}\n\n"
                 elif section.startswith("Observation:"):
+                    formatted_response += f"{section[11:].strip()}\n\n"
                 elif section.startswith("Final Answer:"):
+                    formatted_response += f"{section[12:].strip()}\n\n"
                 else:
                     formatted_response += f"{section}\n\n"
         else:
         session_histories[session_id].append({"role": "assistant", "content": formatted_response})
         return history, ""
     except Exception as e:
         error_str = str(e).lower()
+        if "credit" in error_str or "quota" in error_str or "limit" in error_str or "exceeded" in error_str:
+            error_message = "It seems I've run out of API credits. Please try again later or tomorrow when the credits reset."
+        elif "invalid_api_key" in error_str or "incorrect_api_key" in error_str:
+            error_message = "Invalid OpenAI API key. Please check your API key and try again."
+        elif "api_key" in error_str:
+            error_message = "OpenAI API key not found. Please set the OPENAI_API_KEY environment variable."
         else:
             error_message = f"Error: {str(e)}"
 # --- Evaluation Functions ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
+    Fetches all questions, runs the ScholarAI on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
+        agent = ScholarAI(
+            max_iterations=35,
+            temperature=DEFAULT_TEMPERATURE,
+            max_tokens=DEFAULT_MAX_TOKENS,
+            model=DEFAULT_MODEL
+        )
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
         return status_message, results_df
 # --- Build Gradio Interface using Blocks with Tabs ---
+with gr.Blocks(title="ScholarAI Agent", css=custom_css) as demo:
+    with gr.Row(elem_classes="header-bar"):
+        with gr.Column(scale=3):
+            gr.Markdown("# <span style='font-size: 1.8em'>ScholarAI</span>", elem_classes="title")
             gr.Markdown("""
+            <div class="badges">
+                <img src="https://img.shields.io/badge/build-passing-brightgreen" alt="Build Status">
+                <img src="https://img.shields.io/badge/License-MIT-yellow" alt="License">
+                <img src="https://img.shields.io/badge/version-1.0.0-blue" alt="Version">
+                <img src="https://img.shields.io/badge/python-3.11-blue" alt="Python">
+                <img src="https://img.shields.io/badge/gradio-5.29.1-orange" alt="Gradio">
+            </div>
+            """, elem_classes="badges-container")
+        with gr.Column(scale=1):
+            gr.Markdown("<span style='font-size: 0.9em'>by [Vividh Mahajan](https://huggingface.co/Lasdw)</span>", elem_classes="author")
+    gr.Markdown("""
+    ## ScholarAI helps you find answers by searching the web, analyzing images, processing audio, and more.
+    ### Tip: Ask specific, factual questions for best results. Some websites may be restricted.
+    """)
+    with gr.Accordion("Example Questions", open=False, elem_classes="example-questions"):
+        gr.Markdown("""
+        ### Example Questions:
+        **Research & Analysis:**
+        - "Find the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists. Tell me thier current age and where they are from."
+        - "Analyze this image of a mathematical equation, and find an academic papers that use this equation."
+        **Multi-Modal Analysis:**
+        - "I have an interview recording and a transcript. Compare the audio transcription with the provided transcript, identify any discrepancies."
+        - "This image shows a historical document. Find me the historical events from that era."
+        **Code & Data Processing:**
+        - "I have a Python script and an Excel file with data. Analyze the code's functionality and suggest improvements based on the data patterns."
+        - "This code contains a bug. Debug it."
+        The agent can handle multiple file uploads and combine information from various sources to provide comprehensive answers. Try asking complex questions that require multiple tools working together!
+        """)
+    with gr.Row():
+        # Left panel - Chat interface
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(
+                height=250,
+                type="messages"
+            )
             with gr.Row():
+                question_input = gr.Textbox(
+                    label="Ask a question",
+                    placeholder="e.g. Analyze this interview transcript and find discrepancies",
+                    lines=5,
+                    max_lines=5,
+                    container=True,
+                    scale=2,
+                    min_width=500
+                )
+                with gr.Column(scale=1):
                     with gr.Row():
+                        with gr.Column(scale=2):
+                            file_upload = gr.File(
+                                label="Upload Files (.png, .txt, .mp3, .xlsx, .py)",
+                                file_types=ALLOWED_FILE_EXTENSIONS,
+                                file_count="multiple",
+                                height=175,
+                                min_width=200
+                            )
+            with gr.Row():
+                submit_btn = gr.Button("Start Research", variant="primary")
+        # Right panel - Controls
+        with gr.Column(scale=1):
+            gr.Markdown("# Model Settings")
+            with gr.Group():
+                temperature = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=DEFAULT_TEMPERATURE,
+                    step=0.1,
+                    label="Temperature",
+                    info="Higher values make the output more random, lower values make it more deterministic"
+                )
+                max_tokens = gr.Slider(
+                    minimum=100,
+                    maximum=4000,
+                    value=DEFAULT_MAX_TOKENS,
+                    step=100,
+                    label="Max Tokens",
+                    info="Maximum length of the response"
+                )
+                model = gr.Dropdown(
+                    choices=["gpt-4o-mini", "gpt-3.5-turbo"],
+                    value=DEFAULT_MODEL,
+                    label="Model",
+                    info="The language model to use"
+                )
+    # Footer with disclaimer
+    gr.Markdown("""
+    <div class="footer">
+    This tool is designed for educational and research purposes only. It is not intended for malicious use.
+    </div>
+    """)
+    # Chat interface event handlers
+    submit_btn.click(
+        fn=chat_with_agent,
+        inputs=[question_input, file_upload, chatbot, temperature, max_tokens, model],
+        outputs=[chatbot, question_input]
+    )
+    question_input.submit(
+        fn=chat_with_agent,
+        inputs=[question_input, file_upload, chatbot, temperature, max_tokens, model],
+        outputs=[chatbot, question_input]
+    )
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup:
+        print(f"SPACE_HOST found: {space_host_startup}")
         print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
+        print("SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:
+        print(f"SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
+        print("SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for ScholarAI Agent...")
     demo.launch(debug=True, share=False, show_api=False, favicon_path="static/favicon.ico", enable_monitoring=True)

static/custom.css ADDED Viewed

	@@ -0,0 +1,478 @@

+    :root {
+         --primary-purple: #8a3db8;
+         --dark-purple: #6a2c9e;
+         --light-purple: #b366ff;
+         --black: #1a1a1a;
+         --dark-gray: #2d2d2d;
+         --light-gray: #404040;
+         --white: #ffffff;
+     }
+     /* Header styling */
+     h1 {
+         font-size: 2.5em !important;
+         margin-bottom: 0.5em !important;
+     }
+     h1 span {
+         color: var(--light-purple) !important;
+         font-weight: bold !important;
+     }
+     /* Global styles */
+     .gradio-container {
+         background-color: var(--black) !important;
+         color: var(--white) !important;
+         padding-bottom: 60px !important;
+     }
+     /* Chat interface */
+     .chatbot {
+         background-color: var(--dark-gray) !important;
+         border-radius: 8px !important;
+         max-width: 520px !important; /* Limit width of the whole chat area */
+         margin-left: auto !important;
+         margin-right: auto !important;
+     }
+     /* Message bubbles */
+     .message, .user-message, .assistant-message {
+         border: none !important;
+         border-radius: 10px !important;
+         box-shadow: none !important;
+         background: var(--light-purple) !important;
+         color: var(--white) !important;
+         margin: 8px 0 !important;
+         padding: 12px 18px !important;
+         max-width: 85% !important; /* Limit width of each message bubble */
+         word-break: break-word !important;
+     }
+     .user-message {
+         background-color: var(--light-purple) !important;
+         color: var(--white) !important;
+     }
+     .assistant-message {
+         background-color: var(--light-gray) !important;
+         color: var(--white) !important;
+     }
+     /* Input area */
+     .textbox {
+         background-color: var(--dark-gray) !important;
+         color: var(--white) !important;
+     }
+     .textbox:focus {
+         border-color: var(--light-purple) !important;
+         box-shadow: 0 0 5px var(--light-purple) !important;
+     }
+     /* Buttons */
+     button {
+         background-color: var(--light-purple) !important;
+         color: var(--white) !important;
+         border: none !important;
+         border-radius: 4px !important;
+         transition: background-color 0.3s ease !important;
+     }
+     button:hover {
+         background-color: var(--primary-purple) !important;
+     }
+     /* Slider styling */
+     input[type="range"],
+     .gr-form,
+     .gr-box,
+     .gr-input {
+         -webkit-appearance: none !important;
+         background: var(--light-purple) !important;
+         height: 4px !important;
+         border-radius: 2px !important;
+     }
+     input[type="range"]::-webkit-slider-thumb,
+     .gr-form::-webkit-slider-thumb,
+     .gr-box::-webkit-slider-thumb,
+     .gr-input::-webkit-slider-thumb {
+         -webkit-appearance: none !important;
+         width: 16px !important;
+         height: 16px !important;
+         background: var(--light-purple) !important;
+         border: 2px solid var(--white) !important;
+         border-radius: 50% !important;
+         cursor: pointer !important;
+     }
+     input[type="range"]::-moz-range-thumb,
+     .gr-form::-moz-range-thumb,
+     .gr-box::-moz-range-thumb,
+     .gr-input::-moz-range-thumb {
+         width: 16px !important;
+         height: 16px !important;
+         background: var(--light-purple) !important;
+         border: 2px solid var(--white) !important;
+         border-radius: 50% !important;
+         cursor: pointer !important;
+     }
+     input[type="range"]::-webkit-slider-runnable-track,
+     .gr-form::-webkit-slider-runnable-track,
+     .gr-box::-webkit-slider-runnable-track,
+     .gr-input::-webkit-slider-runnable-track {
+         background: var(--light-purple) !important;
+         height: 4px !important;
+         border-radius: 2px !important;
+     }
+     input[type="range"]::-moz-range-track,
+     .gr-form::-moz-range-track,
+     .gr-box::-moz-range-track,
+     .gr-input::-moz-range-track {
+         background: var(--light-purple) !important;
+         height: 4px !important;
+         border-radius: 2px !important;
+     }
+     /* Dropdown styling */
+     select, .dropdown {
+         background-color: var(--dark-gray) !important;
+         color: var(--white) !important;
+         border: 1px solid var(--light-purple) !important;
+         border-radius: 4px !important;
+     }
+     select:hover, .dropdown:hover {
+         border-color: var(--light-purple) !important;
+     }
+     select:focus, .dropdown:focus {
+         border-color: var(--light-purple) !important;
+         box-shadow: 0 0 5px var(--light-purple) !important;
+     }
+     /* Markdown content */
+     .markdown {
+         color: var(--white) !important;
+     }
+     .markdown h1, .markdown h2, .markdown h3 {
+         color: var(--light-purple) !important;
+     }
+     /* File upload area */
+     .upload-area {
+         background-color: var(--dark-gray) !important;
+         border: 2px dashed var(--light-purple) !important;
+         border-radius: 8px !important;
+     }
+     .upload-area:hover {
+         border-color: var(--light-purple) !important;
+     }
+     /* Scrollbars */
+     ::-webkit-scrollbar {
+         width: 8px;
+         height: 8px;
+     }
+     ::-webkit-scrollbar-track {
+         background: var(--dark-gray);
+     }
+     ::-webkit-scrollbar-thumb {
+         background: var(--light-purple);
+         border-radius: 4px;
+     }
+     ::-webkit-scrollbar-thumb:hover {
+         background: var(--primary-purple);
+     }
+     /* Footer styling */
+     .footer {
+         position: fixed;
+         bottom: 0;
+         left: 0;
+         right: 0;
+         background-color: var(--black) !important;
+         color: var(--white) !important;
+         padding: 10px 20px !important;
+         text-align: center !important;
+         border-top: 1px solid var(--light-purple) !important;
+         font-size: 1em !important;
+         z-index: 1000 !important;
+     }
+     /* Accordion styling */
+     .accordion {
+         background-color: var(--dark-gray) !important;
+         border: 1px solid var(--light-purple) !important;
+         border-radius: 8px !important;
+         margin: 10px 0 !important;
+     }
+     .accordion-header {
+         background-color: var(--dark-gray) !important;
+         color: var(--white) !important;
+         padding: 12px !important;
+         cursor: pointer !important;
+         transition: background-color 0.3s ease !important;
+     }
+     .accordion-header:hover {
+         background-color: var(--light-gray) !important;
+     }
+     .accordion-content {
+         background-color: var(--dark-gray) !important;
+         color: var(--white) !important;
+         padding: 15px !important;
+         border-top: 1px solid var(--light-purple) !important;
+     }
+     /* Accordion icon */
+     .accordion-header::after {
+         color: var(--light-purple) !important;
+     }
+     /* Model Settings styling */
+     .slider,
+     .gr-slider,
+     .gr-form {
+         background-color: var(--black) !important;
+     }
+     .slider .thumb,
+     .gr-slider .thumb,
+     .gr-form .thumb {
+         background-color: var(--light-purple) !important;
+         border: 2px solid var(--white) !important;
+     }
+     .slider .track,
+     .gr-slider .track,
+     .gr-form .track {
+         background-color: var(--dark-purple) !important;
+     }
+     .slider .track-fill,
+     .gr-slider .track-fill,
+     .gr-form .track-fill {
+         background-color: var(--light-purple) !important;
+     }
+     .slider .label,
+     .gr-slider .label,
+     .gr-form .label {
+         color: var(--white) !important;
+     }
+     .slider .info,
+     .gr-slider .info,
+     .gr-form .info,
+     .gr-box .info,
+     .gr-input .info {
+         color: var(--black) !important;
+         opacity: 1 !important;
+     }
+     /* Additional Gradio-specific overrides */
+     .gr-box[data-testid="slider"] .track {
+         background-color: var(--dark-purple) !important;
+     }
+     .gr-box[data-testid="slider"] .info {
+         color: var(--black) !important;
+         opacity: 1 !important;
+     }
+     .gr-box[data-testid="slider"] .track-fill {
+         background-color: var(--light-purple) !important;
+     }
+     /* Box below model settings */
+     .gr-box,
+     .gr-box[data-testid="box"],
+     .gr-box[data-testid="group"] {
+         background-color: var(--light-purple) !important;
+         border-radius: 8px !important;
+         padding: 15px !important;
+         margin-top: 10px !important;
+     }
+     .gr-box *,
+     .gr-box[data-testid="box"] *,
+     .gr-box[data-testid="group"] * {
+         color: var(--white) !important;
+     }
+     .gr-box .markdown,
+     .gr-box[data-testid="box"] .markdown,
+     .gr-box[data-testid="group"] .markdown {
+         color: var(--white) !important;
+         background-color: transparent !important;
+     }
+     .gr-box .markdown p,
+     .gr-box[data-testid="box"] .markdown p,
+     .gr-box[data-testid="group"] .markdown p {
+         color: var(--white) !important;
+         margin: 0 !important;
+         background-color: transparent !important;
+     }
+     /* Override any conflicting styles */
+     .gr-box[data-testid="box"] .column,
+     .gr-box[data-testid="group"] .column {
+         background-color: transparent !important;
+     }
+     .gr-box[data-testid="box"] .column .markdown,
+     .gr-box[data-testid="group"] .column .markdown {
+         background-color: transparent !important;
+     }
+     /* Info text styling */
+     .info {
+         color: var(--light-purple) !important;
+         font-size: 0.9em !important;
+         font-style: italic !important;
+     }
+     /* Side panel styling */
+     .column {
+         background-color: var(--black) !important;
+     }
+     .column .markdown {
+         background-color: var(--light-purple) !important;
+         border: none !important;
+     }
+     .column .markdown p {
+         background-color: var(--light-purple) !important;
+         border: none !important;
+     }
+     .header-bar {
+         padding: 5px 10px !important;
+         margin-bottom: 10px !important;
+         background: transparent !important;
+     }
+     .title {
+         text-align: left !important;
+         margin: 0 !important;
+         padding: 0 !important;
+     }
+     .author {
+         text-align: right !important;
+         margin: 0 !important;
+         padding: 0 !important;
+         line-height: 2.5em !important;
+     }
+     /* Example Questions styling */
+     .example-questions {
+         margin: 20px 0 !important;
+         border: 2px solid var(--light-purple) !important;
+         border-radius: 8px !important;
+         background: linear-gradient(45deg, var(--primary-purple), var(--light-purple)) !important;
+         transition: all 0.3s ease !important;
+     }
+     .example-questions:hover {
+         transform: translateY(-2px) !important;
+         box-shadow: 0 4px 15px rgba(156, 77, 204, 0.3) !important;
+     }
+     .example-questions .accordion-header {
+         background: transparent !important;
+         color: var(--white) !important;
+         font-size: 1.2em !important;
+         font-weight: bold !important;
+         padding: 15px 20px !important;
+         cursor: pointer !important;
+         display: flex !important;
+         align-items: center !important;
+         gap: 10px !important;
+     }
+     .example-questions .accordion-header::after {
+         content: "Click to expand" !important;
+         font-size: 0.8em !important;
+         opacity: 0.8 !important;
+         margin-left: auto !important;
+     }
+     .example-questions .accordion-content {
+         background-color: var(--dark-gray) !important;
+         padding: 20px !important;
+         border-top: 1px solid var(--light-purple) !important;
+     }
+     .example-questions .markdown {
+         color: var(--white) !important;
+     }
+     .example-questions .markdown h3 {
+         color: var(--light-purple) !important;
+         margin-bottom: 15px !important;
+     }
+     .example-questions .markdown strong {
+         color: var(--light-purple) !important;
+     }
+     /* Badges styling */
+     .badges-container {
+         margin-top: 5px !important;
+     }
+     .badges {
+         display: flex !important;
+         gap: 8px !important;
+         flex-wrap: wrap !important;
+         align-items: center !important;
+     }
+     .badges img {
+         height: 20px !important;
+         transition: transform 0.2s ease !important;
+     }
+     .badges img:hover {
+         transform: translateY(-2px) !important;
+     }
+     /* Remove all blockquote and code block vertical bars and backgrounds */
+     .markdown blockquote {
+         border-left: none !important;
+         background: none !important;
+         margin: 0 !important;
+         padding: 0 0 0 0 !important;
+         color: var(--white) !important;
+         box-shadow: none !important;
+     }
+     .markdown pre,
+     .markdown code {
+         border: none !important;
+         background: var(--dark-gray) !important;
+         box-shadow: none !important;
+         margin: 0 !important;
+         color: var(--white) !important;
+     }
+     /* Force remove any left border, box-shadow, or background from all markdown descendants */
+     .markdown * {
+         border-left: none !important;
+         box-shadow: none !important;
+         background: none !important;
+         color: var(--white) !important;
+     }

tools.py CHANGED Viewed

@@ -485,7 +485,7 @@ def wikipedia_search(query: str, num_results: int = 3) -> str:
             formatted_results += f"{i}. {title}\n"
             formatted_results += f"   URL: {source}\n"
             formatted_results += f"   {content}\n\n"
         return formatted_results
     except Exception as e:
@@ -550,7 +550,7 @@ def tavily_search(query: str, search_depth: str = "basic") -> str:
         # Otherwise, just convert to string representation
         else:
             formatted_results += str(results)
         return formatted_results
     except Exception as e:
@@ -617,7 +617,9 @@ def arxiv_search(query: str, max_results: int = 5) -> str:
             if len(abstract) > 300:
                 abstract = abstract[:300] + "..."
             formatted_results += f"   Abstract: {abstract}\n\n"
         return formatted_results
     except Exception as e:

             formatted_results += f"{i}. {title}\n"
             formatted_results += f"   URL: {source}\n"
             formatted_results += f"   {content}\n\n"
+            print("formatted_results:", formatted_results[:100])
         return formatted_results
     except Exception as e:
         # Otherwise, just convert to string representation
         else:
             formatted_results += str(results)
+        print("formatted_results:", formatted_results[:100])
         return formatted_results
     except Exception as e:
             if len(abstract) > 300:
                 abstract = abstract[:300] + "..."
             formatted_results += f"   Abstract: {abstract}\n\n"
+        print("formatted_results:", formatted_results[:100])
         return formatted_results
     except Exception as e: