Final_Assignment_Template

Sleeping

App Files Files Community

sirine1712 commited on Jun 18

Commit

d6f7c66

verified ·

1 Parent(s): a3a13ef

Update app.py

Browse files

Files changed (1) hide show

app.py +171 -448

app.py CHANGED Viewed

@@ -1,473 +1,196 @@
 import os
 import gradio as gr
 import requests
 import pandas as pd
-import json
-import time
-import re
-from typing import Dict, List, Any, Optional
-# Config
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-MODEL_NAME = "google/flan-t5-large"  # Free model that works well
-SPACE_ID = os.getenv("SPACE_ID", "sirine1712/Final_Assignment_Template")
-HF_TOKEN = os.getenv("HF_TOKEN")
-class GAIAAgent:
-    """Specialized agent for GAIA benchmark questions with proper auth handling"""
-    def __init__(self, model: str = MODEL_NAME):
-        self.model = model
-        self.api_url = f"https://api-inference.huggingface.co/models/{model}"
-        self.headers = self._get_headers()
-    def _get_headers(self) -> dict:
-        """Get proper headers with authentication"""
-        if not HF_TOKEN:
-            print("⚠️ WARNING: HF_TOKEN not found in environment variables")
-            return {"Content-Type": "application/json"}
-        return {
-            "Authorization": f"Bearer {HF_TOKEN}",
-            "Content-Type": "application/json"
-        }
-    def _test_api_access(self) -> bool:
-        """Test if we can access the HF API"""
-        try:
-            test_response = requests.post(
-                self.api_url,
-                headers=self.headers,
-                json={"inputs": "Test connection"},
-                timeout=10
-            )
-            if test_response.status_code == 401:
-                print("❌ Authentication failed - check HF_TOKEN")
-                return False
-            elif test_response.status_code == 503:
-                print("⏳ Model is loading...")
-                return True
-            else:
-                print("✅ API access confirmed")
-                return True
-        except Exception as e:
-            print(f"❌ API test failed: {e}")
-            return False
-    def classify_question_type(self, question: str) -> str:
-        """Classify question type for better processing"""
-        question_lower = question.lower()
-        # Mathematical/computational questions
-        if any(word in question_lower for word in [
-            'calculate', 'compute', 'sum', 'multiply', 'divide', 'subtract',
-            'average', 'mean', 'percentage', 'ratio', 'equation', 'formula',
-            'math', 'arithmetic', 'algebra', '+', '-', '*', '/', '='
-        ]):
-            return "mathematical"
-        # Factual/knowledge questions
-        elif any(word in question_lower for word in [
-            'who is', 'what is', 'when was', 'where is', 'which',
-            'born', 'died', 'founded', 'invented', 'discovered',
-            'capital', 'president', 'author', 'wrote', 'directed'
-        ]):
-            return "factual"
-        # Counting/quantitative questions
-        elif any(word in question_lower for word in [
-            'how many', 'count', 'number of', 'total', 'quantity'
-        ]):
-            return "counting"
-        # Date/time questions
-        elif any(word in question_lower for word in [
-            'year', 'date', 'century', 'decade', 'month', 'day',
-            'age', 'old', 'recent', 'latest', 'first time', 'last time'
-        ]):
-            return "temporal"
-        else:
-            return "general"
-    def format_prompt_by_type(self, question: str, question_type: str) -> str:
-        """Format prompt based on question type for T5 model"""
-        if question_type == "mathematical":
-            return f"solve: {question}"
-        elif question_type == "factual":
-            return f"question: {question}"
-        elif question_type == "counting":
-            return f"count: {question}"
-        elif question_type == "temporal":
-            return f"when: {question}"
-        else:
-            return f"answer: {question}"
-    def extract_clean_answer(self, raw_response: str, question: str, question_type: str) -> str:
-        """Extract and clean the answer from model response"""
-        if not raw_response or len(raw_response.strip()) == 0:
-            return "Unable to generate answer"
-        # Clean the response
-        response = raw_response.strip()
-        # For T5 models, often the response is already clean
-        # Remove common artifacts
-        response = re.sub(r'^(answer:|solution:|result:)\s*', '', response, flags=re.IGNORECASE)
-        # Extract specific patterns based on question type
-        if question_type == "mathematical":
-            # Try to extract numerical answer
-            numbers = re.findall(r'-?\d+\.?\d*', response)
-            if numbers:
-                return str(numbers[-1])  # Return the last number found
-        elif question_type == "counting":
-            # Extract the first number found
-            numbers = re.findall(r'\d+', response)
-            if numbers:
-                return str(numbers[0])
-        elif question_type == "temporal":
-            # Look for years, dates
-            years = re.findall(r'\b(19|20)\d{2}\b', response)
-            if years:
-                return str(years[0])
-            dates = re.findall(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b', response)
-            if dates:
-                return str(dates[0])
-        # Clean up the response length
-        sentences = response.split('.')
-        if len(sentences) > 0 and len(sentences[0]) > 5:
-            clean_answer = sentences[0].strip()
-            if len(clean_answer) > 100:
-                clean_answer = clean_answer[:100] + "..."
-            return clean_answer
-        # Fallback: return first 100 characters
-        return response[:100] + "..." if len(response) > 100 else response
     def __call__(self, question: str) -> str:
-        """Main method to process questions"""
-        print(f"🔍 Processing: {question[:60]}...")
-        # Check API access first
-        if not self._test_api_access():
-            return "API authentication failed - check HF_TOKEN"
-        try:
-            # Classify and format the question
-            question_type = self.classify_question_type(question)
-            formatted_prompt = self.format_prompt_by_type(question, question_type)
-            print(f"📝 Question type: {question_type}")
-            # Make API call with retries
-            max_retries = 3
-            for attempt in range(max_retries):
-                try:
-                    response = requests.post(
-                        self.api_url,
-                        headers=self.headers,
-                        json={
-                            "inputs": formatted_prompt,
-                            "parameters": {
-                                "max_new_tokens": 100,
-                                "temperature": 0.1,  # Very low temperature for precise answers
-                                "do_sample": False,  # Deterministic output
-                                "return_full_text": False
-                            }
-                        },
-                        timeout=20
-                    )
-                    if response.status_code == 401:
-                        return "Authentication error - invalid HF_TOKEN"
-                    elif response.status_code == 503:  # Model loading
-                        wait_time = 15 + (attempt * 10)
-                        print(f"⏳ Model loading, waiting {wait_time}s... (attempt {attempt + 1})")
-                        time.sleep(wait_time)
-                        continue
-                    elif response.status_code == 429:  # Rate limit
-                        wait_time = 5 + (attempt * 5)
-                        print(f"⏳ Rate limited, waiting {wait_time}s...")
-                        time.sleep(wait_time)
-                        continue
-                    response.raise_for_status()
-                    result = response.json()
-                    # Extract the generated text
-                    if isinstance(result, list) and len(result) > 0:
-                        if 'generated_text' in result[0]:
-                            raw_answer = result[0]['generated_text']
-                        else:
-                            raw_answer = str(result[0])
-                    elif isinstance(result, dict):
-                        raw_answer = result.get('generated_text', str(result))
-                    else:
-                        raw_answer = str(result)
-                    # Clean and extract the final answer
-                    final_answer = self.extract_clean_answer(raw_answer, question, question_type)
-                    print(f"✅ Answer: {final_answer}")
-                    return final_answer
-                except requests.exceptions.RequestException as e:
-                    if attempt == max_retries - 1:
-                        return f"Request failed after {max_retries} attempts: {str(e)}"
-                    print(f"⚠️ Request failed (attempt {attempt + 1}), retrying...")
-                    time.sleep(3)
-        except Exception as e:
-            error_msg = f"Processing error: {str(e)}"
-            print(f"❌ {error_msg}")
-            return error_msg
-def check_environment():
-    """Check if environment is properly configured"""
-    issues = []
-    if not HF_TOKEN:
-        issues.append("❌ HF_TOKEN not found in environment variables")
-    else:
-        issues.append("✅ HF_TOKEN found")
-    if not SPACE_ID:
-        issues.append("❌ SPACE_ID not configured")
     else:
-        issues.append(f"✅ SPACE_ID: {SPACE_ID}")
-    return "\n".join(issues)
-def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """Main function to run agent on all questions and submit results"""
-    if not profile:
-        return "❌ Please log in with your Hugging Face account first.", None
-    # Check environment
-    env_status = check_environment()
-    if "❌" in env_status:
-        return f"Environment check failed:\n{env_status}", None
-    username = profile.username or "anonymous"
-    agent_code = f"https://huggingface.co/spaces/{SPACE_ID}/tree/main"
-    print(f"🚀 Starting GAIA evaluation for user: {username}")
-    print(f"🔧 Environment status:\n{env_status}")
-    # Initialize the agent
-    agent = GAIAAgent()
-    # Fetch questions from GAIA API
     try:
-        print("📥 Fetching questions from GAIA API...")
-        questions_response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
-        questions_response.raise_for_status()
-        questions = questions_response.json()
-        print(f"✅ Retrieved {len(questions)} questions")
     except Exception as e:
-        error_msg = f"❌ Failed to fetch questions: {str(e)}"
-        print(error_msg)
-        return error_msg, None
-    # Process each question
-    answers = []
-    log_entries = []
-    successful_answers = 0
-    for i, q in enumerate(questions, 1):
-        print(f"\n{'='*60}")
-        print(f"🔄 Question {i}/{len(questions)}")
-        print(f"Task ID: {q.get('task_id', 'Unknown')}")
-        print(f"Question: {q['question']}")
         try:
-            # Get answer from agent
-            answer = agent(q["question"])
-            if not answer.startswith(("Error:", "Authentication error", "API authentication failed")):
-                successful_answers += 1
-                status = "✅ Success"
-            else:
-                status = "❌ Failed"
         except Exception as e:
-            answer = f"Error: {str(e)}"
-            status = "❌ Exception"
-            print(f"❌ Exception processing question: {e}")
-        # Prepare submission format
-        answers.append({
-            "task_id": q["task_id"],
-            "submitted_answer": str(answer)
-        })
-        # Log for display
-        log_entries.append({
-            "Task ID": q["task_id"],
-            "Question": q["question"][:80] + "..." if len(q["question"]) > 80 else q["question"],
-            "Answer": str(answer)[:60] + "..." if len(str(answer)) > 60 else str(answer),
-            "Status": status
-        })
-        print(f"Answer: {answer}")
-        print(f"Status: {status}")
-    print(f"\n📊 Processing complete: {successful_answers}/{len(questions)} successful")
-    # Submit answers to GAIA scoring API
     try:
-        print(f"\n📤 Submitting {len(answers)} answers to GAIA API...")
-        submission_data = {
-            "username": username,
-            "agent_code": agent_code,
-            "answers": answers
-        }
-        submit_response = requests.post(
-            f"{DEFAULT_API_URL}/submit",
-            json=submission_data,
-            timeout=60
         )
-        submit_response.raise_for_status()
-        result = submit_response.json()
-        print(f"✅ Submission successful!")
     except Exception as e:
-        error_msg = f"❌ Submission failed: {str(e)}"
-        print(error_msg)
-        return error_msg, pd.DataFrame(log_entries)
-    # Format results
-    score = result.get('score', 'N/A')
-    correct_count = result.get('correct_count', 'N/A')
-    total_attempted = result.get('total_attempted', 'N/A')
-    message = result.get('message', 'No additional message')
-    success_message = f"""✅ **GAIA Evaluation Complete!**
-**📊 Results:**
-- **Score:** {score}%
-- **Correct Answers:** {correct_count}/{total_attempted}
-- **Questions Processed:** {len(questions)}
-- **Successful API Calls:** {successful_answers}/{len(questions)}
-**🎯 Target Progress:** {"✅ TARGET ACHIEVED!" if isinstance(score, (int, float)) and score >= 30.0 else f"Need {30.0 - (score if isinstance(score, (int, float)) else 0):.1f}% more to reach 30%"}
-**📝 System Message:** {message}
-**💡 Tips for improvement:**
-- Ensure HF_TOKEN has proper permissions
-- Try running again if API calls failed
-- Check question types that performed poorly
-"""
-    print(success_message)
-    return success_message, pd.DataFrame(log_entries)
-# Create Gradio Interface
-def create_interface():
-    """Create the Gradio interface"""
-    with gr.Blocks(
-        title="🎯 GAIA Challenge Agent",
-        theme=gr.themes.Soft(),
-        css="""
-        .status-box {
-            background: #f8f9fa;
-            border-left: 4px solid #007bff;
-            padding: 15px;
-        }
-        """
-    ) as demo:
-        gr.Markdown("""
-        # 🎯 GAIA Challenge Agent
-        **Goal:** Achieve 30% accuracy on the GAIA benchmark
-        This agent uses Google's FLAN-T5-Large model with specialized question processing to tackle GAIA's challenging questions.
-        **Setup Required:**
-        1. Set `HF_TOKEN` in your Space secrets (Settings → Repository secrets)
-        2. Set `SPACE_ID` to your space name (e.g., "username/space-name")
-        """)
-        # Environment check
-        with gr.Accordion("🔧 Environment Check", open=False):
-            env_check = gr.Textbox(
-                value=check_environment(),
-                label="Environment Status",
-                lines=3,
-                interactive=False
-            )
-        # Authentication
-        gr.Markdown("### 🔐 Authentication")
-        gr.LoginButton(value="🔑 Login with Hugging Face")
-        # Main controls
-        gr.Markdown("### 🚀 Run Evaluation")
-        run_button = gr.Button(
-            "🎯 Start GAIA Evaluation",
-            variant="primary",
-            size="lg"
-        )
-        # Results
-        gr.Markdown("### 📊 Results")
-        with gr.Row():
-            status_output = gr.Textbox(
-                label="📋 Evaluation Results",
-                lines=12,
-                max_lines=20,
-                placeholder="Click 'Start GAIA Evaluation' to begin...",
-                elem_classes=["status-box"]
-            )
-        gr.Markdown("### 📝 Question Processing Log")
-        results_table = gr.DataFrame(
-            label="Detailed Processing Results",
-            headers=["Task ID", "Question", "Answer", "Status"],
-            wrap=True,
-            max_height=400
-        )
-        # Event handlers
-        run_button.click(
-            fn=run_and_submit_all,
-            outputs=[status_output, results_table],
-            show_progress=True
-        )
-        # Footer
-        gr.Markdown("""
         ---
-        **🔍 Troubleshooting:**
-        - **401 Error:** Check that HF_TOKEN is valid and set in Space secrets
-        - **503 Error:** Model is loading, wait and try again
-        - **0% Score:** Check answer format and question processing logic
-        **📚 Model:** google/flan-t5-large (instruction-tuned for better reasoning)
-        """)
-    return demo
-# Launch the app
 if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False
-    )

 import os
 import gradio as gr
 import requests
+import inspect
 import pandas as pd
+# (Keep Constants as is)
+# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Basic Agent Definition ---
+# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+class BasicAgent:
+    def __init__(self):
+        print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        fixed_answer = "This is a default answer."
+        print(f"Agent returning fixed answer: {fixed_answer}")
+        return fixed_answer
+def run_and_submit_all( profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    if profile:
+        username= f"{profile.username}"
+        print(f"User logged in: {username}")
     else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
+    try:
+        agent = BasicAgent()
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
     try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
         try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
         )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
     except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup: # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)