File size: 14,008 Bytes
b56f671
 
 
 
 
 
15bb146
 
b56f671
 
 
15bb146
b56f671
 
d0c134a
b56f671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0c134a
15bb146
b56f671
 
 
15bb146
b56f671
 
 
 
 
 
 
 
15bb146
b56f671
15bb146
b56f671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0c134a
b56f671
 
d0c134a
b56f671
 
 
 
 
 
 
 
 
 
 
 
 
 
15bb146
b56f671
 
 
 
15bb146
b56f671
 
 
 
 
 
15bb146
b56f671
 
 
 
 
 
15bb146
b56f671
 
 
 
 
 
 
 
 
 
 
 
15bb146
b56f671
 
 
 
 
d0c134a
b56f671
 
 
 
 
 
 
 
 
 
d0c134a
 
b56f671
 
 
 
 
 
 
 
 
 
 
 
 
d0c134a
15bb146
b56f671
 
 
 
 
 
 
 
 
d0c134a
 
b56f671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0c134a
 
b56f671
 
 
 
 
 
 
 
15bb146
d0c134a
b56f671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15bb146
 
 
b56f671
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
#!/usr/bin/env python3
"""
๐Ÿš€ Enhanced GAIA Agent Interface - Full API Integration
Complete Gradio interface for GAIA benchmark with API connectivity and scoring
"""

import os
import gradio as gr
import json
from datetime import datetime
from gaia_agent import GAIAAgent

class GAIAInterface:
    """๐ŸŽฏ Enhanced GAIA Interface with Full API Integration"""
    
    def __init__(self):
        self.agent = GAIAAgent()
        self.current_questions = []
        self.answered_questions = []
        self.score_history = []
    
    def fetch_questions(self):
        """Fetch questions from GAIA API"""
        try:
            questions = self.agent.get_questions()
            if questions:
                self.current_questions = questions
                return f"โœ… Fetched {len(questions)} questions from GAIA API"
            else:
                return "โŒ Failed to fetch questions from GAIA API"
        except Exception as e:
            return f"โŒ Error fetching questions: {str(e)}"
    
    def get_random_question(self):
        """Get a random question from GAIA API"""
        try:
            question_data = self.agent.get_random_question()
            if question_data:
                task_id = question_data.get('task_id', 'unknown')
                question = question_data.get('Question', 'No question found')
                level = question_data.get('Level', 'Unknown')
                files = question_data.get('file_name', None)
                
                info = f"๐Ÿ“‹ **Task ID:** {task_id}\n"
                info += f"๐ŸŽฏ **Level:** {level}\n"
                if files:
                    info += f"๐Ÿ“ **Associated Files:** {files}\n"
                info += f"โ“ **Question:** {question}"
                
                return info, task_id, question
            else:
                return "โŒ Failed to fetch random question", "", ""
        except Exception as e:
            return f"โŒ Error: {str(e)}", "", ""
    
    def process_question_with_files(self, question, task_id=None):
        """Process question with enhanced agent and file handling"""
        if not question.strip():
            return "Please enter a question or fetch one from GAIA API."
        
        try:
            # Use enhanced agent with task_id for file downloading
            answer = self.agent.query(question, task_id=task_id, max_steps=15)
            clean_answer = self.agent.clean_for_api_submission(answer)
            
            # Store the answer for potential submission
            if task_id:
                self.answered_questions.append({
                    "task_id": task_id,
                    "question": question,
                    "submitted_answer": clean_answer,
                    "timestamp": datetime.now().isoformat()
                })
            
            return f"โœ… **Answer:** {clean_answer}\n\n๐Ÿง  **Reasoning Memory:**\n" + "\n".join(self.agent.reasoning_memory[-5:])
        except Exception as e:
            return f"โŒ Error: {str(e)}"
    
    def submit_answers_for_scoring(self, username, agent_code_url):
        """Submit answers to GAIA API for scoring"""
        if not username.strip():
            return "โŒ Please provide your Hugging Face username"
        
        if not agent_code_url.strip():
            return "โŒ Please provide your agent code URL (Hugging Face Space)"
        
        if not self.answered_questions:
            return "โŒ No answered questions to submit. Please answer some questions first."
        
        try:
            # Prepare answers for submission
            answers = [
                {
                    "task_id": item["task_id"],
                    "submitted_answer": item["submitted_answer"]
                }
                for item in self.answered_questions
            ]
            
            # Submit to GAIA API
            result = self.agent.submit_answer(username, agent_code_url, answers)
            
            if "error" not in result:
                score = result.get("score", 0)
                self.score_history.append({
                    "score": score,
                    "questions_answered": len(answers),
                    "timestamp": datetime.now().isoformat()
                })
                
                return f"โœ… **Submission Successful!**\n\n๐Ÿ“Š **Score:** {score}%\n๐ŸŽฏ **Questions Answered:** {len(answers)}\n\n๐Ÿ“ˆ **Result Details:**\n{json.dumps(result, indent=2)}"
            else:
                return f"โŒ **Submission Failed:** {result.get('error', 'Unknown error')}"
        
        except Exception as e:
            return f"โŒ Error submitting answers: {str(e)}"
    
    def get_progress_stats(self):
        """Get current progress statistics"""
        total_questions = len(self.current_questions)
        answered_count = len(self.answered_questions)
        
        if self.score_history:
            latest_score = self.score_history[-1]["score"]
            best_score = max(item["score"] for item in self.score_history)
        else:
            latest_score = 0
            best_score = 0
        
        stats = f"๐Ÿ“Š **Progress Statistics**\n\n"
        stats += f"๐ŸŽฏ **Questions Available:** {total_questions}\n"
        stats += f"โœ… **Questions Answered:** {answered_count}\n"
        stats += f"๐Ÿ“ˆ **Latest Score:** {latest_score}%\n"
        stats += f"๐Ÿ† **Best Score:** {best_score}%\n"
        stats += f"๐ŸŽ–๏ธ **Target:** 30% (for certification)\n\n"
        
        if latest_score >= 30:
            stats += "๐ŸŽ‰ **Congratulations! You've achieved the target score for certification!**"
        else:
            remaining = 30 - latest_score
            stats += f"๐Ÿ“ˆ **{remaining}% more needed for certification**"
        
        return stats
    
    def clear_session(self):
        """Clear current session data"""
        self.answered_questions = []
        return "โœ… Session cleared. Ready for new questions."

# Initialize interface
interface = GAIAInterface()

# Enhanced Gradio Interface
with gr.Blocks(title="๐Ÿš€ Enhanced GAIA Agent - Full API Integration", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # ๐Ÿš€ Enhanced GAIA Agent - Complete GAIA Benchmark Implementation
    
    **๐ŸŽฏ Target: 30%+ Performance for Course Certification**
    
    ## ๐ŸŒŸ Key Features:
    - **๐Ÿ”— Full GAIA API Integration** - Fetch real questions and submit for scoring
    - **๐Ÿ“ File Processing** - Automatic download and analysis of task files
    - **๐Ÿง  Enhanced Multi-Step Reasoning** - Advanced tool orchestration
    - **๐Ÿ“Š Real-time Progress Tracking** - Monitor your performance
    - **๐Ÿ† Leaderboard Submission** - Submit scores to student leaderboard
    """)
    
    with gr.Tabs():
        # Tab 1: GAIA Question Processing
        with gr.TabItem("๐ŸŽฏ GAIA Questions"):
            gr.Markdown("### Fetch and Process Real GAIA Benchmark Questions")
            
            with gr.Row():
                with gr.Column(scale=1):
                    fetch_btn = gr.Button("๐Ÿ”„ Fetch Questions from API", variant="secondary")
                    random_question_btn = gr.Button("๐ŸŽฒ Get Random Question", variant="primary")
                    fetch_status = gr.Textbox(label="๐Ÿ“ก API Status", interactive=False)
                
                with gr.Column(scale=2):
                    question_info = gr.Markdown("Click 'Get Random Question' to fetch a GAIA question")
    
    with gr.Row():
                current_task_id = gr.Textbox(label="๐Ÿ†” Task ID", interactive=False)
        question_input = gr.Textbox(
                    label="โ“ GAIA Question",
                    placeholder="Question will appear here when fetched from API",
                    lines=3
        )
    
    with gr.Row():
                process_btn = gr.Button("๐Ÿค– Process with Enhanced Agent", variant="primary", size="lg")
    
    with gr.Row():
        answer_output = gr.Textbox(
                    label="๐Ÿง  Agent Response (with Enhanced Reasoning)",
                    lines=10,
            interactive=False
        )
    
        # Tab 2: Manual Question Input
        with gr.TabItem("โœ๏ธ Manual Input"):
            gr.Markdown("### Test Agent with Custom Questions")
            
            manual_question = gr.Textbox(
                label="โ“ Your Question",
                placeholder="Enter any question to test the agent...",
                lines=3
            )
            
            manual_process_btn = gr.Button("๐Ÿค– Process Question", variant="primary")
            manual_output = gr.Textbox(
                label="๐Ÿง  Agent Response",
                lines=8,
                interactive=False
            )
            
            # Example questions
    gr.Examples(
        examples=[
                    "What is 25 + 37?",
                    "What is the capital of Germany?",
                    "If there are 8 planets and 4 are gas giants, how many are not gas giants?",
                    "Who was the US president when the Berlin Wall fell?",
                    "List the fruits in the painting in clockwise order starting from 12 o'clock",
                    "Convert 100 degrees Celsius to Fahrenheit"
                ],
                inputs=[manual_question],
                label="๐ŸŽฏ Example Questions (Different Complexity Levels)"
            )
        
        # Tab 3: Submission & Scoring
        with gr.TabItem("๐Ÿ“Š Submission & Scoring"):
            gr.Markdown("### Submit Answers for Official GAIA Scoring")
            
            with gr.Row():
                username_input = gr.Textbox(
                    label="๐Ÿ‘ค Hugging Face Username",
                    placeholder="Your HF username for leaderboard"
                )
                agent_code_input = gr.Textbox(
                    label="๐Ÿ”— Agent Code URL",
                    placeholder="https://huggingface.co/spaces/your-username/your-space/tree/main"
                )
            
            submit_btn = gr.Button("๐Ÿš€ Submit for Official Scoring", variant="primary", size="lg")
            submission_result = gr.Textbox(
                label="๐Ÿ“Š Submission Results",
                lines=8,
                interactive=False
            )
            
            with gr.Row():
                progress_btn = gr.Button("๐Ÿ“ˆ View Progress", variant="secondary")
                clear_btn = gr.Button("๐Ÿ—‘๏ธ Clear Session", variant="secondary")
            
            progress_display = gr.Markdown("Click 'View Progress' to see your statistics")
        
        # Tab 4: Agent Capabilities
        with gr.TabItem("๐Ÿ› ๏ธ Agent Details"):
            gr.Markdown("""
            ### ๐Ÿง  Enhanced Agent Capabilities
            
            #### ๐Ÿ”ง **Tool Arsenal** (9 Enhanced Tools):
            1. **๐Ÿงฎ Enhanced Calculator** - Complex mathematical operations and multi-step calculations
            2. **๐ŸŒ Enhanced Web Search** - Expanded knowledge base with 20+ countries, astronomy, history
            3. **๐Ÿ–ผ๏ธ Image Analyzer** - Simulated visual content processing and spatial reasoning
            4. **๐Ÿ“„ Document Reader** - File content extraction and analysis
            5. **๐Ÿ“ File Processor** - Download and process GAIA task files (TXT, JSON, CSV)
            6. **๐Ÿ“… Date Calculator** - Temporal reasoning and age calculations
            7. **๐Ÿ”„ Unit Converter** - Length, temperature, and weight conversions
            8. **๐Ÿ“ Text Analyzer** - Content analysis and pattern extraction
            9. **๐Ÿง  Reasoning Chain** - Multi-step logical synthesis
            
            #### ๐ŸŽฏ **GAIA Compliance Features**:
            - **Level 1**: Basic questions (<5 steps) โœ…
            - **Level 2**: Multi-step reasoning (5-10 steps) โœ…
            - **Level 3**: Complex long-term planning โœ…
            - **File Processing**: Automatic download and analysis โœ…
            - **API Integration**: Full GAIA benchmark connectivity โœ…
            - **Clean Formatting**: Exact match answer preparation โœ…
            
            #### ๐Ÿ“Š **Performance Targets**:
            - **Minimum Required**: 30% accuracy for certification
            - **Current Baseline**: GPT-4 with plugins ~15%
            - **Enhanced Target**: 35-45% with optimized knowledge base
            - **Human Performance**: ~92% (reference point)
            
            #### ๐Ÿง  **Enhanced Knowledge Base**:
            - **Geography**: 20+ countries and capitals
            - **Astronomy**: Solar system facts, planet classifications
            - **History**: Key events with dates and figures
            - **Mathematics**: Constants and conversion factors
            - **Arts**: Famous paintings and artists
            """)
    
    # Event handlers
    fetch_btn.click(
        fn=interface.fetch_questions,
        outputs=[fetch_status]
    )
    
    random_question_btn.click(
        fn=interface.get_random_question,
        outputs=[question_info, current_task_id, question_input]
    )
    
    process_btn.click(
        fn=lambda q, t: interface.process_question_with_files(q, t),
        inputs=[question_input, current_task_id],
        outputs=[answer_output]
    )
    
    manual_process_btn.click(
        fn=lambda q: interface.process_question_with_files(q),
        inputs=[manual_question],
        outputs=[manual_output]
    )
    
    submit_btn.click(
        fn=interface.submit_answers_for_scoring,
        inputs=[username_input, agent_code_input],
        outputs=[submission_result]
    )
    
    progress_btn.click(
        fn=interface.get_progress_stats,
        outputs=[progress_display]
    )
    
    clear_btn.click(
        fn=interface.clear_session,
        outputs=[submission_result]
    )

if __name__ == "__main__":
    demo.launch(
        debug=False,
        share=True,
        server_name="0.0.0.0",
        server_port=7860
    )