sirine1712 commited on
Commit
d6f7c66
·
verified ·
1 Parent(s): a3a13ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -448
app.py CHANGED
@@ -1,473 +1,196 @@
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
- import json
6
- import time
7
- import re
8
- from typing import Dict, List, Any, Optional
9
 
10
- # Config
 
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
- MODEL_NAME = "google/flan-t5-large" # Free model that works well
13
- SPACE_ID = os.getenv("SPACE_ID", "sirine1712/Final_Assignment_Template")
14
- HF_TOKEN = os.getenv("HF_TOKEN")
15
 
16
- class GAIAAgent:
17
- """Specialized agent for GAIA benchmark questions with proper auth handling"""
18
-
19
- def __init__(self, model: str = MODEL_NAME):
20
- self.model = model
21
- self.api_url = f"https://api-inference.huggingface.co/models/{model}"
22
- self.headers = self._get_headers()
23
-
24
- def _get_headers(self) -> dict:
25
- """Get proper headers with authentication"""
26
- if not HF_TOKEN:
27
- print("⚠️ WARNING: HF_TOKEN not found in environment variables")
28
- return {"Content-Type": "application/json"}
29
-
30
- return {
31
- "Authorization": f"Bearer {HF_TOKEN}",
32
- "Content-Type": "application/json"
33
- }
34
-
35
- def _test_api_access(self) -> bool:
36
- """Test if we can access the HF API"""
37
- try:
38
- test_response = requests.post(
39
- self.api_url,
40
- headers=self.headers,
41
- json={"inputs": "Test connection"},
42
- timeout=10
43
- )
44
- if test_response.status_code == 401:
45
- print("❌ Authentication failed - check HF_TOKEN")
46
- return False
47
- elif test_response.status_code == 503:
48
- print("⏳ Model is loading...")
49
- return True
50
- else:
51
- print("✅ API access confirmed")
52
- return True
53
- except Exception as e:
54
- print(f"❌ API test failed: {e}")
55
- return False
56
-
57
- def classify_question_type(self, question: str) -> str:
58
- """Classify question type for better processing"""
59
- question_lower = question.lower()
60
-
61
- # Mathematical/computational questions
62
- if any(word in question_lower for word in [
63
- 'calculate', 'compute', 'sum', 'multiply', 'divide', 'subtract',
64
- 'average', 'mean', 'percentage', 'ratio', 'equation', 'formula',
65
- 'math', 'arithmetic', 'algebra', '+', '-', '*', '/', '='
66
- ]):
67
- return "mathematical"
68
-
69
- # Factual/knowledge questions
70
- elif any(word in question_lower for word in [
71
- 'who is', 'what is', 'when was', 'where is', 'which',
72
- 'born', 'died', 'founded', 'invented', 'discovered',
73
- 'capital', 'president', 'author', 'wrote', 'directed'
74
- ]):
75
- return "factual"
76
-
77
- # Counting/quantitative questions
78
- elif any(word in question_lower for word in [
79
- 'how many', 'count', 'number of', 'total', 'quantity'
80
- ]):
81
- return "counting"
82
-
83
- # Date/time questions
84
- elif any(word in question_lower for word in [
85
- 'year', 'date', 'century', 'decade', 'month', 'day',
86
- 'age', 'old', 'recent', 'latest', 'first time', 'last time'
87
- ]):
88
- return "temporal"
89
-
90
- else:
91
- return "general"
92
-
93
- def format_prompt_by_type(self, question: str, question_type: str) -> str:
94
- """Format prompt based on question type for T5 model"""
95
-
96
- if question_type == "mathematical":
97
- return f"solve: {question}"
98
-
99
- elif question_type == "factual":
100
- return f"question: {question}"
101
-
102
- elif question_type == "counting":
103
- return f"count: {question}"
104
-
105
- elif question_type == "temporal":
106
- return f"when: {question}"
107
-
108
- else:
109
- return f"answer: {question}"
110
-
111
- def extract_clean_answer(self, raw_response: str, question: str, question_type: str) -> str:
112
- """Extract and clean the answer from model response"""
113
- if not raw_response or len(raw_response.strip()) == 0:
114
- return "Unable to generate answer"
115
-
116
- # Clean the response
117
- response = raw_response.strip()
118
-
119
- # For T5 models, often the response is already clean
120
- # Remove common artifacts
121
- response = re.sub(r'^(answer:|solution:|result:)\s*', '', response, flags=re.IGNORECASE)
122
-
123
- # Extract specific patterns based on question type
124
- if question_type == "mathematical":
125
- # Try to extract numerical answer
126
- numbers = re.findall(r'-?\d+\.?\d*', response)
127
- if numbers:
128
- return str(numbers[-1]) # Return the last number found
129
-
130
- elif question_type == "counting":
131
- # Extract the first number found
132
- numbers = re.findall(r'\d+', response)
133
- if numbers:
134
- return str(numbers[0])
135
-
136
- elif question_type == "temporal":
137
- # Look for years, dates
138
- years = re.findall(r'\b(19|20)\d{2}\b', response)
139
- if years:
140
- return str(years[0])
141
-
142
- dates = re.findall(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b', response)
143
- if dates:
144
- return str(dates[0])
145
-
146
- # Clean up the response length
147
- sentences = response.split('.')
148
- if len(sentences) > 0 and len(sentences[0]) > 5:
149
- clean_answer = sentences[0].strip()
150
- if len(clean_answer) > 100:
151
- clean_answer = clean_answer[:100] + "..."
152
- return clean_answer
153
-
154
- # Fallback: return first 100 characters
155
- return response[:100] + "..." if len(response) > 100 else response
156
-
157
  def __call__(self, question: str) -> str:
158
- """Main method to process questions"""
159
- print(f"🔍 Processing: {question[:60]}...")
160
-
161
- # Check API access first
162
- if not self._test_api_access():
163
- return "API authentication failed - check HF_TOKEN"
164
-
165
- try:
166
- # Classify and format the question
167
- question_type = self.classify_question_type(question)
168
- formatted_prompt = self.format_prompt_by_type(question, question_type)
169
-
170
- print(f"📝 Question type: {question_type}")
171
-
172
- # Make API call with retries
173
- max_retries = 3
174
- for attempt in range(max_retries):
175
- try:
176
- response = requests.post(
177
- self.api_url,
178
- headers=self.headers,
179
- json={
180
- "inputs": formatted_prompt,
181
- "parameters": {
182
- "max_new_tokens": 100,
183
- "temperature": 0.1, # Very low temperature for precise answers
184
- "do_sample": False, # Deterministic output
185
- "return_full_text": False
186
- }
187
- },
188
- timeout=20
189
- )
190
-
191
- if response.status_code == 401:
192
- return "Authentication error - invalid HF_TOKEN"
193
-
194
- elif response.status_code == 503: # Model loading
195
- wait_time = 15 + (attempt * 10)
196
- print(f"⏳ Model loading, waiting {wait_time}s... (attempt {attempt + 1})")
197
- time.sleep(wait_time)
198
- continue
199
-
200
- elif response.status_code == 429: # Rate limit
201
- wait_time = 5 + (attempt * 5)
202
- print(f"⏳ Rate limited, waiting {wait_time}s...")
203
- time.sleep(wait_time)
204
- continue
205
-
206
- response.raise_for_status()
207
- result = response.json()
208
-
209
- # Extract the generated text
210
- if isinstance(result, list) and len(result) > 0:
211
- if 'generated_text' in result[0]:
212
- raw_answer = result[0]['generated_text']
213
- else:
214
- raw_answer = str(result[0])
215
- elif isinstance(result, dict):
216
- raw_answer = result.get('generated_text', str(result))
217
- else:
218
- raw_answer = str(result)
219
-
220
- # Clean and extract the final answer
221
- final_answer = self.extract_clean_answer(raw_answer, question, question_type)
222
- print(f"✅ Answer: {final_answer}")
223
- return final_answer
224
-
225
- except requests.exceptions.RequestException as e:
226
- if attempt == max_retries - 1:
227
- return f"Request failed after {max_retries} attempts: {str(e)}"
228
- print(f"⚠️ Request failed (attempt {attempt + 1}), retrying...")
229
- time.sleep(3)
230
-
231
- except Exception as e:
232
- error_msg = f"Processing error: {str(e)}"
233
- print(f"❌ {error_msg}")
234
- return error_msg
235
 
236
- def check_environment():
237
- """Check if environment is properly configured"""
238
- issues = []
239
-
240
- if not HF_TOKEN:
241
- issues.append("❌ HF_TOKEN not found in environment variables")
242
- else:
243
- issues.append("✅ HF_TOKEN found")
244
-
245
- if not SPACE_ID:
246
- issues.append(" SPACE_ID not configured")
247
  else:
248
- issues.append(f" SPACE_ID: {SPACE_ID}")
249
-
250
- return "\n".join(issues)
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
- def run_and_submit_all(profile: gr.OAuthProfile | None):
253
- """Main function to run agent on all questions and submit results"""
254
- if not profile:
255
- return "❌ Please log in with your Hugging Face account first.", None
256
-
257
- # Check environment
258
- env_status = check_environment()
259
- if "❌" in env_status:
260
- return f"Environment check failed:\n{env_status}", None
261
-
262
- username = profile.username or "anonymous"
263
- agent_code = f"https://huggingface.co/spaces/{SPACE_ID}/tree/main"
264
-
265
- print(f"🚀 Starting GAIA evaluation for user: {username}")
266
- print(f"🔧 Environment status:\n{env_status}")
267
-
268
- # Initialize the agent
269
- agent = GAIAAgent()
270
-
271
- # Fetch questions from GAIA API
272
  try:
273
- print("📥 Fetching questions from GAIA API...")
274
- questions_response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
275
- questions_response.raise_for_status()
276
- questions = questions_response.json()
277
- print(f" Retrieved {len(questions)} questions")
 
 
 
 
 
 
 
 
 
278
  except Exception as e:
279
- error_msg = f" Failed to fetch questions: {str(e)}"
280
- print(error_msg)
281
- return error_msg, None
282
-
283
- # Process each question
284
- answers = []
285
- log_entries = []
286
- successful_answers = 0
287
-
288
- for i, q in enumerate(questions, 1):
289
- print(f"\n{'='*60}")
290
- print(f"🔄 Question {i}/{len(questions)}")
291
- print(f"Task ID: {q.get('task_id', 'Unknown')}")
292
- print(f"Question: {q['question']}")
293
-
294
  try:
295
- # Get answer from agent
296
- answer = agent(q["question"])
297
-
298
- if not answer.startswith(("Error:", "Authentication error", "API authentication failed")):
299
- successful_answers += 1
300
- status = "✅ Success"
301
- else:
302
- status = "❌ Failed"
303
-
304
  except Exception as e:
305
- answer = f"Error: {str(e)}"
306
- status = " Exception"
307
- print(f"❌ Exception processing question: {e}")
308
-
309
- # Prepare submission format
310
- answers.append({
311
- "task_id": q["task_id"],
312
- "submitted_answer": str(answer)
313
- })
314
-
315
- # Log for display
316
- log_entries.append({
317
- "Task ID": q["task_id"],
318
- "Question": q["question"][:80] + "..." if len(q["question"]) > 80 else q["question"],
319
- "Answer": str(answer)[:60] + "..." if len(str(answer)) > 60 else str(answer),
320
- "Status": status
321
- })
322
-
323
- print(f"Answer: {answer}")
324
- print(f"Status: {status}")
325
-
326
- print(f"\n📊 Processing complete: {successful_answers}/{len(questions)} successful")
327
-
328
- # Submit answers to GAIA scoring API
329
  try:
330
- print(f"\n📤 Submitting {len(answers)} answers to GAIA API...")
331
- submission_data = {
332
- "username": username,
333
- "agent_code": agent_code,
334
- "answers": answers
335
- }
336
-
337
- submit_response = requests.post(
338
- f"{DEFAULT_API_URL}/submit",
339
- json=submission_data,
340
- timeout=60
341
  )
342
- submit_response.raise_for_status()
343
- result = submit_response.json()
344
-
345
- print(f"✅ Submission successful!")
346
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  except Exception as e:
348
- error_msg = f" Submission failed: {str(e)}"
349
- print(error_msg)
350
- return error_msg, pd.DataFrame(log_entries)
351
-
352
- # Format results
353
- score = result.get('score', 'N/A')
354
- correct_count = result.get('correct_count', 'N/A')
355
- total_attempted = result.get('total_attempted', 'N/A')
356
- message = result.get('message', 'No additional message')
357
-
358
- success_message = f"""✅ **GAIA Evaluation Complete!**
359
-
360
- **📊 Results:**
361
- - **Score:** {score}%
362
- - **Correct Answers:** {correct_count}/{total_attempted}
363
- - **Questions Processed:** {len(questions)}
364
- - **Successful API Calls:** {successful_answers}/{len(questions)}
365
 
366
- **🎯 Target Progress:** {"✅ TARGET ACHIEVED!" if isinstance(score, (int, float)) and score >= 30.0 else f"Need {30.0 - (score if isinstance(score, (int, float)) else 0):.1f}% more to reach 30%"}
367
 
368
- **📝 System Message:** {message}
 
 
 
 
 
369
 
370
- **💡 Tips for improvement:**
371
- - Ensure HF_TOKEN has proper permissions
372
- - Try running again if API calls failed
373
- - Check question types that performed poorly
374
- """
375
-
376
- print(success_message)
377
- return success_message, pd.DataFrame(log_entries)
378
 
379
- # Create Gradio Interface
380
- def create_interface():
381
- """Create the Gradio interface"""
382
- with gr.Blocks(
383
- title="🎯 GAIA Challenge Agent",
384
- theme=gr.themes.Soft(),
385
- css="""
386
- .status-box {
387
- background: #f8f9fa;
388
- border-left: 4px solid #007bff;
389
- padding: 15px;
390
- }
391
- """
392
- ) as demo:
393
-
394
- gr.Markdown("""
395
- # 🎯 GAIA Challenge Agent
396
-
397
- **Goal:** Achieve 30% accuracy on the GAIA benchmark
398
-
399
- This agent uses Google's FLAN-T5-Large model with specialized question processing to tackle GAIA's challenging questions.
400
-
401
- **Setup Required:**
402
- 1. Set `HF_TOKEN` in your Space secrets (Settings → Repository secrets)
403
- 2. Set `SPACE_ID` to your space name (e.g., "username/space-name")
404
- """)
405
-
406
- # Environment check
407
- with gr.Accordion("🔧 Environment Check", open=False):
408
- env_check = gr.Textbox(
409
- value=check_environment(),
410
- label="Environment Status",
411
- lines=3,
412
- interactive=False
413
- )
414
-
415
- # Authentication
416
- gr.Markdown("### 🔐 Authentication")
417
- gr.LoginButton(value="🔑 Login with Hugging Face")
418
-
419
- # Main controls
420
- gr.Markdown("### 🚀 Run Evaluation")
421
- run_button = gr.Button(
422
- "🎯 Start GAIA Evaluation",
423
- variant="primary",
424
- size="lg"
425
- )
426
-
427
- # Results
428
- gr.Markdown("### 📊 Results")
429
- with gr.Row():
430
- status_output = gr.Textbox(
431
- label="📋 Evaluation Results",
432
- lines=12,
433
- max_lines=20,
434
- placeholder="Click 'Start GAIA Evaluation' to begin...",
435
- elem_classes=["status-box"]
436
- )
437
-
438
- gr.Markdown("### 📝 Question Processing Log")
439
- results_table = gr.DataFrame(
440
- label="Detailed Processing Results",
441
- headers=["Task ID", "Question", "Answer", "Status"],
442
- wrap=True,
443
- max_height=400
444
- )
445
-
446
- # Event handlers
447
- run_button.click(
448
- fn=run_and_submit_all,
449
- outputs=[status_output, results_table],
450
- show_progress=True
451
- )
452
-
453
- # Footer
454
- gr.Markdown("""
455
  ---
456
- **🔍 Troubleshooting:**
457
- - **401 Error:** Check that HF_TOKEN is valid and set in Space secrets
458
- - **503 Error:** Model is loading, wait and try again
459
- - **0% Score:** Check answer format and question processing logic
460
-
461
- **📚 Model:** google/flan-t5-large (instruction-tuned for better reasoning)
462
- """)
463
-
464
- return demo
 
 
 
 
 
 
 
 
 
465
 
466
- # Launch the app
467
  if __name__ == "__main__":
468
- demo = create_interface()
469
- demo.launch(
470
- server_name="0.0.0.0",
471
- server_port=7860,
472
- share=False
473
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  import requests
4
+ import inspect
5
  import pandas as pd
 
 
 
 
6
 
7
+ # (Keep Constants as is)
8
+ # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
10
 
11
+ # --- Basic Agent Definition ---
12
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
+ class BasicAgent:
14
+ def __init__(self):
15
+ print("BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def __call__(self, question: str) -> str:
17
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
18
+ fixed_answer = "This is a default answer."
19
+ print(f"Agent returning fixed answer: {fixed_answer}")
20
+ return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
23
+ """
24
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
25
+ and displays the results.
26
+ """
27
+ # --- Determine HF Space Runtime URL and Repo URL ---
28
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
+
30
+ if profile:
31
+ username= f"{profile.username}"
32
+ print(f"User logged in: {username}")
33
  else:
34
+ print("User not logged in.")
35
+ return "Please Login to Hugging Face with the button.", None
36
+
37
+ api_url = DEFAULT_API_URL
38
+ questions_url = f"{api_url}/questions"
39
+ submit_url = f"{api_url}/submit"
40
+
41
+ # 1. Instantiate Agent ( modify this part to create your agent)
42
+ try:
43
+ agent = BasicAgent()
44
+ except Exception as e:
45
+ print(f"Error instantiating agent: {e}")
46
+ return f"Error initializing agent: {e}", None
47
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
+ print(agent_code)
50
 
51
+ # 2. Fetch Questions
52
+ print(f"Fetching questions from: {questions_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  try:
54
+ response = requests.get(questions_url, timeout=15)
55
+ response.raise_for_status()
56
+ questions_data = response.json()
57
+ if not questions_data:
58
+ print("Fetched questions list is empty.")
59
+ return "Fetched questions list is empty or invalid format.", None
60
+ print(f"Fetched {len(questions_data)} questions.")
61
+ except requests.exceptions.RequestException as e:
62
+ print(f"Error fetching questions: {e}")
63
+ return f"Error fetching questions: {e}", None
64
+ except requests.exceptions.JSONDecodeError as e:
65
+ print(f"Error decoding JSON response from questions endpoint: {e}")
66
+ print(f"Response text: {response.text[:500]}")
67
+ return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
+ print(f"An unexpected error occurred fetching questions: {e}")
70
+ return f"An unexpected error occurred fetching questions: {e}", None
71
+
72
+ # 3. Run your Agent
73
+ results_log = []
74
+ answers_payload = []
75
+ print(f"Running agent on {len(questions_data)} questions...")
76
+ for item in questions_data:
77
+ task_id = item.get("task_id")
78
+ question_text = item.get("question")
79
+ if not task_id or question_text is None:
80
+ print(f"Skipping item with missing task_id or question: {item}")
81
+ continue
 
 
82
  try:
83
+ submitted_answer = agent(question_text)
84
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
86
  except Exception as e:
87
+ print(f"Error running agent on task {task_id}: {e}")
88
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
+
90
+ if not answers_payload:
91
+ print("Agent did not produce any answers to submit.")
92
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
+
94
+ # 4. Prepare Submission
95
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
+ print(status_update)
98
+
99
+ # 5. Submit
100
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
 
 
 
 
 
 
 
 
101
  try:
102
+ response = requests.post(submit_url, json=submission_data, timeout=60)
103
+ response.raise_for_status()
104
+ result_data = response.json()
105
+ final_status = (
106
+ f"Submission Successful!\n"
107
+ f"User: {result_data.get('username')}\n"
108
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
109
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
+ f"Message: {result_data.get('message', 'No message received.')}"
 
 
111
  )
112
+ print("Submission successful.")
113
+ results_df = pd.DataFrame(results_log)
114
+ return final_status, results_df
115
+ except requests.exceptions.HTTPError as e:
116
+ error_detail = f"Server responded with status {e.response.status_code}."
117
+ try:
118
+ error_json = e.response.json()
119
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
+ except requests.exceptions.JSONDecodeError:
121
+ error_detail += f" Response: {e.response.text[:500]}"
122
+ status_message = f"Submission Failed: {error_detail}"
123
+ print(status_message)
124
+ results_df = pd.DataFrame(results_log)
125
+ return status_message, results_df
126
+ except requests.exceptions.Timeout:
127
+ status_message = "Submission Failed: The request timed out."
128
+ print(status_message)
129
+ results_df = pd.DataFrame(results_log)
130
+ return status_message, results_df
131
+ except requests.exceptions.RequestException as e:
132
+ status_message = f"Submission Failed: Network error - {e}"
133
+ print(status_message)
134
+ results_df = pd.DataFrame(results_log)
135
+ return status_message, results_df
136
  except Exception as e:
137
+ status_message = f"An unexpected error occurred during submission: {e}"
138
+ print(status_message)
139
+ results_df = pd.DataFrame(results_log)
140
+ return status_message, results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
 
142
 
143
+ # --- Build Gradio Interface using Blocks ---
144
+ with gr.Blocks() as demo:
145
+ gr.Markdown("# Basic Agent Evaluation Runner")
146
+ gr.Markdown(
147
+ """
148
+ **Instructions:**
149
 
150
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
 
 
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  ---
155
+ **Disclaimers:**
156
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
+ """
159
+ )
160
+
161
+ gr.LoginButton()
162
+
163
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
164
+
165
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
+ # Removed max_rows=10 from DataFrame constructor
167
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
+
169
+ run_button.click(
170
+ fn=run_and_submit_all,
171
+ outputs=[status_output, results_table]
172
+ )
173
 
 
174
  if __name__ == "__main__":
175
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
176
+ # Check for SPACE_HOST and SPACE_ID at startup for information
177
+ space_host_startup = os.getenv("SPACE_HOST")
178
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
+
180
+ if space_host_startup:
181
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
182
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
+ else:
184
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
+
186
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
187
+ print(f"✅ SPACE_ID found: {space_id_startup}")
188
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
+ else:
191
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
+
193
+ print("-"*(60 + len(" App Starting ")) + "\n")
194
+
195
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
196
+ demo.launch(debug=True, share=False)