#!/usr/bin/env python3 """ šŸš€ Enhanced GAIA Agent Interface - Full API Integration Complete Gradio interface for GAIA benchmark with API connectivity and scoring """ import os import gradio as gr import json from datetime import datetime from gaia_agent import GAIAAgent class GAIAInterface: """šŸŽÆ Enhanced GAIA Interface with Full API Integration""" def __init__(self): self.agent = GAIAAgent() self.current_questions = [] self.answered_questions = [] self.score_history = [] def fetch_questions(self): """Fetch questions from GAIA API""" try: questions = self.agent.get_questions() if questions: self.current_questions = questions return f"āœ… Fetched {len(questions)} questions from GAIA API" else: return "āŒ Failed to fetch questions from GAIA API" except Exception as e: return f"āŒ Error fetching questions: {str(e)}" def get_random_question(self): """Get a random question from GAIA API""" try: question_data = self.agent.get_random_question() if question_data: task_id = question_data.get('task_id', 'unknown') question = question_data.get('Question', 'No question found') level = question_data.get('Level', 'Unknown') files = question_data.get('file_name', None) info = f"šŸ“‹ **Task ID:** {task_id}\n" info += f"šŸŽÆ **Level:** {level}\n" if files: info += f"šŸ“ **Associated Files:** {files}\n" info += f"ā“ **Question:** {question}" return info, task_id, question else: return "āŒ Failed to fetch random question", "", "" except Exception as e: return f"āŒ Error: {str(e)}", "", "" def process_question_with_files(self, question, task_id=None): """Process question with enhanced agent and file handling""" if not question.strip(): return "Please enter a question or fetch one from GAIA API." try: # Use enhanced agent with task_id for file downloading answer = self.agent.query(question, task_id=task_id, max_steps=15) clean_answer = self.agent.clean_for_api_submission(answer) # Store the answer for potential submission if task_id: self.answered_questions.append({ "task_id": task_id, "question": question, "submitted_answer": clean_answer, "timestamp": datetime.now().isoformat() }) return f"āœ… **Answer:** {clean_answer}\n\n🧠 **Reasoning Memory:**\n" + "\n".join(self.agent.reasoning_memory[-5:]) except Exception as e: return f"āŒ Error: {str(e)}" def submit_answers_for_scoring(self, username, agent_code_url): """Submit answers to GAIA API for scoring""" if not username.strip(): return "āŒ Please provide your Hugging Face username" if not agent_code_url.strip(): return "āŒ Please provide your agent code URL (Hugging Face Space)" if not self.answered_questions: return "āŒ No answered questions to submit. Please answer some questions first." try: # Prepare answers for submission answers = [ { "task_id": item["task_id"], "submitted_answer": item["submitted_answer"] } for item in self.answered_questions ] # Submit to GAIA API result = self.agent.submit_answer(username, agent_code_url, answers) if "error" not in result: score = result.get("score", 0) self.score_history.append({ "score": score, "questions_answered": len(answers), "timestamp": datetime.now().isoformat() }) return f"āœ… **Submission Successful!**\n\nšŸ“Š **Score:** {score}%\nšŸŽÆ **Questions Answered:** {len(answers)}\n\nšŸ“ˆ **Result Details:**\n{json.dumps(result, indent=2)}" else: return f"āŒ **Submission Failed:** {result.get('error', 'Unknown error')}" except Exception as e: return f"āŒ Error submitting answers: {str(e)}" def get_progress_stats(self): """Get current progress statistics""" total_questions = len(self.current_questions) answered_count = len(self.answered_questions) if self.score_history: latest_score = self.score_history[-1]["score"] best_score = max(item["score"] for item in self.score_history) else: latest_score = 0 best_score = 0 stats = f"šŸ“Š **Progress Statistics**\n\n" stats += f"šŸŽÆ **Questions Available:** {total_questions}\n" stats += f"āœ… **Questions Answered:** {answered_count}\n" stats += f"šŸ“ˆ **Latest Score:** {latest_score}%\n" stats += f"šŸ† **Best Score:** {best_score}%\n" stats += f"šŸŽ–ļø **Target:** 30% (for certification)\n\n" if latest_score >= 30: stats += "šŸŽ‰ **Congratulations! You've achieved the target score for certification!**" else: remaining = 30 - latest_score stats += f"šŸ“ˆ **{remaining}% more needed for certification**" return stats def clear_session(self): """Clear current session data""" self.answered_questions = [] return "āœ… Session cleared. Ready for new questions." # Initialize interface interface = GAIAInterface() # Enhanced Gradio Interface with gr.Blocks(title="šŸš€ Enhanced GAIA Agent - Full API Integration", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # šŸš€ Enhanced GAIA Agent - Complete GAIA Benchmark Implementation **šŸŽÆ Target: 30%+ Performance for Course Certification** ## 🌟 Key Features: - **šŸ”— Full GAIA API Integration** - Fetch real questions and submit for scoring - **šŸ“ File Processing** - Automatic download and analysis of task files - **🧠 Enhanced Multi-Step Reasoning** - Advanced tool orchestration - **šŸ“Š Real-time Progress Tracking** - Monitor your performance - **šŸ† Leaderboard Submission** - Submit scores to student leaderboard """) with gr.Tabs(): # Tab 1: GAIA Question Processing with gr.TabItem("šŸŽÆ GAIA Questions"): gr.Markdown("### Fetch and Process Real GAIA Benchmark Questions") with gr.Row(): with gr.Column(scale=1): fetch_btn = gr.Button("šŸ”„ Fetch Questions from API", variant="secondary") random_question_btn = gr.Button("šŸŽ² Get Random Question", variant="primary") fetch_status = gr.Textbox(label="šŸ“” API Status", interactive=False) with gr.Column(scale=2): question_info = gr.Markdown("Click 'Get Random Question' to fetch a GAIA question") with gr.Row(): current_task_id = gr.Textbox(label="šŸ†” Task ID", interactive=False) question_input = gr.Textbox( label="ā“ GAIA Question", placeholder="Question will appear here when fetched from API", lines=3 ) with gr.Row(): process_btn = gr.Button("šŸ¤– Process with Enhanced Agent", variant="primary", size="lg") with gr.Row(): answer_output = gr.Textbox( label="🧠 Agent Response (with Enhanced Reasoning)", lines=10, interactive=False ) # Tab 2: Manual Question Input with gr.TabItem("āœļø Manual Input"): gr.Markdown("### Test Agent with Custom Questions") manual_question = gr.Textbox( label="ā“ Your Question", placeholder="Enter any question to test the agent...", lines=3 ) manual_process_btn = gr.Button("šŸ¤– Process Question", variant="primary") manual_output = gr.Textbox( label="🧠 Agent Response", lines=8, interactive=False ) # Example questions gr.Examples( examples=[ "What is 25 + 37?", "What is the capital of Germany?", "If there are 8 planets and 4 are gas giants, how many are not gas giants?", "Who was the US president when the Berlin Wall fell?", "List the fruits in the painting in clockwise order starting from 12 o'clock", "Convert 100 degrees Celsius to Fahrenheit" ], inputs=[manual_question], label="šŸŽÆ Example Questions (Different Complexity Levels)" ) # Tab 3: Submission & Scoring with gr.TabItem("šŸ“Š Submission & Scoring"): gr.Markdown("### Submit Answers for Official GAIA Scoring") with gr.Row(): username_input = gr.Textbox( label="šŸ‘¤ Hugging Face Username", placeholder="Your HF username for leaderboard" ) agent_code_input = gr.Textbox( label="šŸ”— Agent Code URL", placeholder="https://huggingface.co/spaces/your-username/your-space/tree/main" ) submit_btn = gr.Button("šŸš€ Submit for Official Scoring", variant="primary", size="lg") submission_result = gr.Textbox( label="šŸ“Š Submission Results", lines=8, interactive=False ) with gr.Row(): progress_btn = gr.Button("šŸ“ˆ View Progress", variant="secondary") clear_btn = gr.Button("šŸ—‘ļø Clear Session", variant="secondary") progress_display = gr.Markdown("Click 'View Progress' to see your statistics") # Tab 4: Agent Capabilities with gr.TabItem("šŸ› ļø Agent Details"): gr.Markdown(""" ### 🧠 Enhanced Agent Capabilities #### šŸ”§ **Tool Arsenal** (9 Enhanced Tools): 1. **🧮 Enhanced Calculator** - Complex mathematical operations and multi-step calculations 2. **🌐 Enhanced Web Search** - Expanded knowledge base with 20+ countries, astronomy, history 3. **šŸ–¼ļø Image Analyzer** - Simulated visual content processing and spatial reasoning 4. **šŸ“„ Document Reader** - File content extraction and analysis 5. **šŸ“ File Processor** - Download and process GAIA task files (TXT, JSON, CSV) 6. **šŸ“… Date Calculator** - Temporal reasoning and age calculations 7. **šŸ”„ Unit Converter** - Length, temperature, and weight conversions 8. **šŸ“ Text Analyzer** - Content analysis and pattern extraction 9. **🧠 Reasoning Chain** - Multi-step logical synthesis #### šŸŽÆ **GAIA Compliance Features**: - **Level 1**: Basic questions (<5 steps) āœ… - **Level 2**: Multi-step reasoning (5-10 steps) āœ… - **Level 3**: Complex long-term planning āœ… - **File Processing**: Automatic download and analysis āœ… - **API Integration**: Full GAIA benchmark connectivity āœ… - **Clean Formatting**: Exact match answer preparation āœ… #### šŸ“Š **Performance Targets**: - **Minimum Required**: 30% accuracy for certification - **Current Baseline**: GPT-4 with plugins ~15% - **Enhanced Target**: 35-45% with optimized knowledge base - **Human Performance**: ~92% (reference point) #### 🧠 **Enhanced Knowledge Base**: - **Geography**: 20+ countries and capitals - **Astronomy**: Solar system facts, planet classifications - **History**: Key events with dates and figures - **Mathematics**: Constants and conversion factors - **Arts**: Famous paintings and artists """) # Event handlers fetch_btn.click( fn=interface.fetch_questions, outputs=[fetch_status] ) random_question_btn.click( fn=interface.get_random_question, outputs=[question_info, current_task_id, question_input] ) process_btn.click( fn=lambda q, t: interface.process_question_with_files(q, t), inputs=[question_input, current_task_id], outputs=[answer_output] ) manual_process_btn.click( fn=lambda q: interface.process_question_with_files(q), inputs=[manual_question], outputs=[manual_output] ) submit_btn.click( fn=interface.submit_answers_for_scoring, inputs=[username_input, agent_code_input], outputs=[submission_result] ) progress_btn.click( fn=interface.get_progress_stats, outputs=[progress_display] ) clear_btn.click( fn=interface.clear_session, outputs=[submission_result] ) if __name__ == "__main__": demo.launch( debug=False, share=True, server_name="0.0.0.0", server_port=7860 )