Spaces:
Runtime error
Runtime error
Omachoko
commited on
Commit
·
443c7ca
1
Parent(s):
6545c63
Restore original Gradio interface while maintaining enhanced GAIA agent features
Browse files
app.py
CHANGED
@@ -769,86 +769,70 @@ class ModularGAIAAgent:
|
|
769 |
logger.error(f"Batch processing overall error: {e}")
|
770 |
yield "Error in batch processing", []
|
771 |
|
772 |
-
# ---
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
786 |
|
|
|
787 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
788 |
-
"""
|
789 |
-
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
790 |
-
and displays the results.
|
791 |
-
"""
|
792 |
space_id = os.getenv("SPACE_ID")
|
793 |
if profile:
|
794 |
-
username =
|
795 |
print(f"User logged in: {username}")
|
796 |
else:
|
797 |
-
print("User not logged in.")
|
798 |
return "Please Login to Hugging Face with the button.", None
|
|
|
799 |
api_url = DEFAULT_API_URL
|
800 |
questions_url = f"{api_url}/questions"
|
801 |
submit_url = f"{api_url}/submit"
|
802 |
-
|
803 |
-
|
804 |
-
except Exception as e:
|
805 |
-
print(f"Error instantiating agent: {e}")
|
806 |
-
return f"Error initializing agent: {e}", None
|
807 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
808 |
-
|
809 |
-
print(f"Fetching questions from: {questions_url}")
|
810 |
try:
|
811 |
response = requests.get(questions_url, timeout=15)
|
812 |
response.raise_for_status()
|
813 |
questions_data = response.json()
|
814 |
-
if not questions_data:
|
815 |
-
print("Fetched questions list is empty.")
|
816 |
-
return "Fetched questions list is empty or invalid format.", None
|
817 |
-
print(f"Fetched {len(questions_data)} questions.")
|
818 |
-
except requests.exceptions.RequestException as e:
|
819 |
-
print(f"Error fetching questions: {e}")
|
820 |
-
return f"Error fetching questions: {e}", None
|
821 |
-
except requests.exceptions.JSONDecodeError as e:
|
822 |
-
print(f"Error decoding JSON response from questions endpoint: {e}")
|
823 |
-
print(f"Response text: {response.text[:500]}")
|
824 |
-
return f"Error decoding server response for questions: {e}", None
|
825 |
except Exception as e:
|
826 |
-
|
827 |
-
|
828 |
results_log = []
|
829 |
answers_payload = []
|
830 |
-
|
|
|
831 |
for item in questions_data:
|
832 |
task_id = item.get("task_id")
|
833 |
question_text = item.get("question")
|
834 |
-
|
835 |
-
if not task_id or question_text is None:
|
836 |
-
print(f"Skipping item with missing task_id or question: {item}")
|
837 |
continue
|
838 |
-
|
839 |
-
|
840 |
-
|
841 |
-
|
842 |
-
|
843 |
-
print(f"Error running agent on task {task_id}: {e}")
|
844 |
-
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
845 |
if not answers_payload:
|
846 |
-
print("Agent did not produce any answers to submit.")
|
847 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
|
|
848 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
849 |
-
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
850 |
-
print(status_update)
|
851 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
|
|
852 |
try:
|
853 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
854 |
response.raise_for_status()
|
@@ -858,130 +842,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
858 |
f"User: {result_data.get('username')}\n"
|
859 |
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
860 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
861 |
-
f"Message: {result_data.get('message', 'No message received.')}"
|
862 |
-
|
863 |
results_df = pd.DataFrame(results_log)
|
864 |
return final_status, results_df
|
865 |
-
except requests.exceptions.HTTPError as e:
|
866 |
-
error_detail = f"Server responded with status {e.response.status_code}."
|
867 |
-
try:
|
868 |
-
error_json = e.response.json()
|
869 |
-
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
870 |
-
except requests.exceptions.JSONDecodeError:
|
871 |
-
error_detail += f" Response: {e.response.text[:500]}"
|
872 |
-
status_message = f"Submission Failed: {error_detail}"
|
873 |
-
print(status_message)
|
874 |
-
results_df = pd.DataFrame(results_log)
|
875 |
-
return status_message, results_df
|
876 |
-
except requests.exceptions.Timeout:
|
877 |
-
status_message = "Submission Failed: The request timed out."
|
878 |
-
print(status_message)
|
879 |
-
results_df = pd.DataFrame(results_log)
|
880 |
-
return status_message, results_df
|
881 |
-
except requests.exceptions.RequestException as e:
|
882 |
-
status_message = f"Submission Failed: Network error - {e}"
|
883 |
-
print(status_message)
|
884 |
-
results_df = pd.DataFrame(results_log)
|
885 |
-
return status_message, results_df
|
886 |
except Exception as e:
|
887 |
-
|
888 |
-
print(status_message)
|
889 |
-
results_df = pd.DataFrame(results_log)
|
890 |
-
return status_message, results_df
|
891 |
-
|
892 |
-
# --- Gradio UI with Enhanced Feedback and Control ---
|
893 |
-
with gr.Blocks(title="GAIA Agent - Multi-Tab with Progress Tracking") as app:
|
894 |
-
gr.Markdown("# GAIA Agent for Hugging Face AI Agents Course\nTarget: 30%+ on GAIA Benchmark for Certification")
|
895 |
-
with gr.Tabs() as tabs:
|
896 |
-
# Tab 1: Fetch GAIA Questions with Progress
|
897 |
-
with gr.TabItem("Fetch GAIA Questions"):
|
898 |
-
with gr.Row():
|
899 |
-
token_input = gr.Textbox(label="Hugging Face Token", placeholder="Enter your HF token", type="password")
|
900 |
-
fetch_btn = gr.Button("Fetch Questions")
|
901 |
-
fetch_progress = gr.Textbox(label="Progress", value="Not started", interactive=False)
|
902 |
-
questions_output = gr.JSON(label="Fetched Questions")
|
903 |
-
fetch_btn.click(
|
904 |
-
fn=lambda token: ("Fetching...", agent.fetch_questions(token)),
|
905 |
-
inputs=token_input,
|
906 |
-
outputs=[fetch_progress, questions_output]
|
907 |
-
)
|
908 |
-
# Tab 2: Manual Question Input with Detailed Feedback
|
909 |
-
with gr.TabItem("Manual Question Input"):
|
910 |
-
question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here")
|
911 |
-
with gr.Row():
|
912 |
-
file_upload = gr.File(label="Upload File (optional)", file_types=[".jpg", ".png", ".mp3", ".csv", ".xlsx", ".py"])
|
913 |
-
context_upload = gr.File(label="Context Files (optional)", file_count="multiple")
|
914 |
-
answer_btn = gr.Button("Get Answer")
|
915 |
-
with gr.Row():
|
916 |
-
answer_output = gr.Textbox(label="Answer", interactive=False)
|
917 |
-
reasoning_trace = gr.Textbox(label="Reasoning Trace", interactive=False)
|
918 |
-
answer_btn.click(
|
919 |
-
fn=lambda q, f, ctx: agent.answer_question_manual(q, f, ctx),
|
920 |
-
inputs=[question_input, file_upload, context_upload],
|
921 |
-
outputs=[answer_output, reasoning_trace]
|
922 |
-
)
|
923 |
-
# Tab 3: Submit Answers and View Score with Progress Bar
|
924 |
-
with gr.TabItem("Submit & Score"):
|
925 |
-
with gr.Row():
|
926 |
-
submit_token = gr.Textbox(label="Hugging Face Token", placeholder="Enter your HF token", type="password")
|
927 |
-
submit_btn = gr.Button("Run on All & Submit")
|
928 |
-
submit_progress = gr.Textbox(label="Submission Progress", value="Not started", interactive=False)
|
929 |
-
score_output = gr.Textbox(label="Score", interactive=False)
|
930 |
-
with gr.Row():
|
931 |
-
progress_bar = gr.Slider(minimum=0, maximum=100, value=0, label="Completion", interactive=False)
|
932 |
-
status_text = gr.Textbox(label="Status", value="Idle", interactive=False)
|
933 |
-
submit_btn.click(
|
934 |
-
fn=lambda token: agent.run_and_submit_all(token),
|
935 |
-
inputs=submit_token,
|
936 |
-
outputs=[submit_progress, score_output, progress_bar, status_text]
|
937 |
-
)
|
938 |
-
# Tab 4: Agent Details and Configuration
|
939 |
-
with gr.TabItem("Agent Details"):
|
940 |
-
gr.Markdown("## Agent Capabilities\n- **Tools**: Web search, image/audio analysis, table QA, YouTube QA, chess analysis, botanical classification\n- **Reasoning**: Thought-Action-Observation cycle with ReAct prompting (up to 5 steps)\n- **API**: Full GAIA API integration for fetching and submitting\n- **Performance**: Optimized with caching and error recovery")
|
941 |
-
with gr.Row():
|
942 |
-
tool_list = gr.Textbox(label="Available Tools", value=", ".join(TOOL_REGISTRY.keys()), interactive=False)
|
943 |
-
config_btn = gr.Button("Refresh Configuration")
|
944 |
-
config_output = gr.Textbox(label="Configuration Status", interactive=False)
|
945 |
-
config_btn.click(
|
946 |
-
fn=lambda: ("Configuration refreshed", ", ".join(TOOL_REGISTRY.keys())),
|
947 |
-
inputs=None,
|
948 |
-
outputs=[config_output, tool_list]
|
949 |
-
)
|
950 |
-
# Tab 5: Batch Processing with Progress Tracking
|
951 |
-
with gr.TabItem("Batch Processing"):
|
952 |
-
batch_token = gr.Textbox(label="Hugging Face Token", placeholder="Enter your HF token", type="password")
|
953 |
-
batch_btn = gr.Button("Process Batch of Questions")
|
954 |
-
batch_progress = gr.Textbox(label="Batch Progress", value="0/0 questions processed", interactive=False)
|
955 |
-
batch_results = gr.JSON(label="Batch Results")
|
956 |
-
batch_btn.click(
|
957 |
-
fn=lambda token: agent.process_batch(token),
|
958 |
-
inputs=batch_token,
|
959 |
-
outputs=[batch_progress, batch_results]
|
960 |
-
)
|
961 |
-
|
962 |
-
# Launch app with public link for easy access
|
963 |
-
app.launch(share=True)
|
964 |
-
|
965 |
-
if __name__ == "__main__":
|
966 |
-
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
967 |
-
# Check for SPACE_HOST and SPACE_ID at startup for information
|
968 |
-
space_host_startup = os.getenv("SPACE_HOST")
|
969 |
-
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
970 |
-
|
971 |
-
if space_host_startup:
|
972 |
-
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
973 |
-
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
974 |
-
else:
|
975 |
-
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
976 |
-
|
977 |
-
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
978 |
-
print(f"✅ SPACE_ID found: {space_id_startup}")
|
979 |
-
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
980 |
-
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
981 |
-
else:
|
982 |
-
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
983 |
-
|
984 |
-
print("-"*(60 + len(" App Starting ")) + "\n")
|
985 |
-
|
986 |
-
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
987 |
-
app.launch(debug=True, share=False)
|
|
|
769 |
logger.error(f"Batch processing overall error: {e}")
|
770 |
yield "Error in batch processing", []
|
771 |
|
772 |
+
# --- Build Gradio Interface using Blocks (Maintaining Original Architecture) ---
|
773 |
+
with gr.Blocks() as demo:
|
774 |
+
gr.Markdown("# Smart Agent Evaluation Runner")
|
775 |
+
gr.Markdown("""
|
776 |
+
**Instructions:**
|
777 |
+
1. Clone this space, define your agent logic, tools, packages, etc.
|
778 |
+
2. Log in to Hugging Face.
|
779 |
+
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
780 |
+
""")
|
781 |
+
|
782 |
+
gr.LoginButton()
|
783 |
+
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
784 |
+
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
785 |
+
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
786 |
+
|
787 |
+
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
788 |
+
|
789 |
+
if __name__ == "__main__":
|
790 |
+
print("Launching Gradio Interface for Smart Agent Evaluation...")
|
791 |
+
demo.launch(debug=True, share=False)
|
792 |
|
793 |
+
# Update run_and_submit_all to use the enhanced ModularGAIAAgent
|
794 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
|
|
|
|
|
|
795 |
space_id = os.getenv("SPACE_ID")
|
796 |
if profile:
|
797 |
+
username = profile.username
|
798 |
print(f"User logged in: {username}")
|
799 |
else:
|
|
|
800 |
return "Please Login to Hugging Face with the button.", None
|
801 |
+
|
802 |
api_url = DEFAULT_API_URL
|
803 |
questions_url = f"{api_url}/questions"
|
804 |
submit_url = f"{api_url}/submit"
|
805 |
+
|
806 |
+
agent = ModularGAIAAgent(api_url=DEFAULT_API_URL)
|
|
|
|
|
|
|
807 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
808 |
+
|
|
|
809 |
try:
|
810 |
response = requests.get(questions_url, timeout=15)
|
811 |
response.raise_for_status()
|
812 |
questions_data = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
813 |
except Exception as e:
|
814 |
+
return f"Error fetching questions: {e}", None
|
815 |
+
|
816 |
results_log = []
|
817 |
answers_payload = []
|
818 |
+
correct_answers = 0
|
819 |
+
|
820 |
for item in questions_data:
|
821 |
task_id = item.get("task_id")
|
822 |
question_text = item.get("question")
|
823 |
+
if not task_id or not question_text:
|
|
|
|
|
824 |
continue
|
825 |
+
|
826 |
+
submitted_answer, trace = agent.answer_question(item)
|
827 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
828 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer, "Reasoning Trace": "\n".join(trace)})
|
829 |
+
|
|
|
|
|
830 |
if not answers_payload:
|
|
|
831 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
832 |
+
|
833 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
|
|
|
|
834 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
835 |
+
|
836 |
try:
|
837 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
838 |
response.raise_for_status()
|
|
|
842 |
f"User: {result_data.get('username')}\n"
|
843 |
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
844 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
845 |
+
f"Message: {result_data.get('message', 'No message received.')}"
|
846 |
+
)
|
847 |
results_df = pd.DataFrame(results_log)
|
848 |
return final_status, results_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
849 |
except Exception as e:
|
850 |
+
return f"Submission Failed: {e}", pd.DataFrame(results_log)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|