Final_Assignment_Template

Sleeping

App Files Files Community

sirine1712 commited on Jun 17

Commit

a3a13ef

verified ·

1 Parent(s): 4a0cb0a

Update app.py

Browse files

Files changed (1) hide show

app.py +293 -126

app.py CHANGED Viewed

@@ -4,84 +4,172 @@ import requests
 import pandas as pd
 import json
 import time
 from typing import Dict, List, Any, Optional
 # Config
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-MODEL_NAME = "microsoft/DialoGPT-medium"  # Better conversational model
 SPACE_ID = os.getenv("SPACE_ID", "sirine1712/Final_Assignment_Template")
 HF_TOKEN = os.getenv("HF_TOKEN")
-class HuggingFaceAPIAgent:
-    """Enhanced Hugging Face Inference Agent with better question processing"""
     def __init__(self, model: str = MODEL_NAME):
         self.model = model
         self.api_url = f"https://api-inference.huggingface.co/models/{model}"
-        self.headers = {"Authorization": f"Bearer {HF_TOKEN}"}
-    def preprocess_question(self, question: str) -> str:
-        """Preprocess question to improve model understanding"""
-        # Add context markers for better comprehension
-        processed = f"Question: {question.strip()}"
-        # Handle specific question types
-        if any(word in question.lower() for word in ['calculate', 'compute', 'math', 'number']):
-            processed = f"Math problem: {question.strip()} Please provide the numerical answer."
-        elif any(word in question.lower() for word in ['when', 'what year', 'date']):
-            processed = f"Factual question about time: {question.strip()} Please provide the specific date or year."
-        elif any(word in question.lower() for word in ['who', 'person', 'people']):
-            processed = f"Question about people: {question.strip()} Please provide the name(s)."
-        elif any(word in question.lower() for word in ['where', 'location', 'place']):
-            processed = f"Location question: {question.strip()} Please provide the specific location."
-        elif any(word in question.lower() for word in ['how many', 'count', 'quantity']):
-            processed = f"Counting question: {question.strip()} Please provide the exact number."
-        return processed
-    def postprocess_answer(self, raw_answer: str, question: str) -> str:
-        """Clean and format the model's response"""
-        if not raw_answer:
             return "Unable to generate answer"
-        # Remove common prefixes/suffixes
-        answer = raw_answer.strip()
-        prefixes_to_remove = [
-            "Question:", "Answer:", "Response:", "Output:",
-            "The answer is:", "Based on the question:",
-            "Math problem:", "Factual question about time:",
-            "Question about people:", "Location question:",
-            "Counting question:"
-        ]
-        for prefix in prefixes_to_remove:
-            if answer.lower().startswith(prefix.lower()):
-                answer = answer[len(prefix):].strip()
-        # Extract specific answer patterns
-        if any(word in question.lower() for word in ['calculate', 'compute', 'math']):
-            # Try to extract numbers from the response
-            import re
-            numbers = re.findall(r'-?\d+\.?\d*', answer)
             if numbers:
-                return numbers[-1]  # Return the last number found
-        # Limit answer length for conciseness
-        if len(answer) > 200:
-            sentences = answer.split('.')
-            answer = sentences[0] + '.' if sentences else answer[:200]
-        return answer
     def __call__(self, question: str) -> str:
         """Main method to process questions"""
-        print(f"⏳ Processing question: {question[:80]}...")
         try:
-            # Preprocess the question
-            processed_question = self.preprocess_question(question)
-            # Make API call with retry logic
             max_retries = 3
             for attempt in range(max_retries):
                 try:
@@ -89,66 +177,101 @@ class HuggingFaceAPIAgent:
                         self.api_url,
                         headers=self.headers,
                         json={
-                            "inputs": processed_question,
                             "parameters": {
-                                "max_length": 150,
-                                "temperature": 0.3,  # Lower temperature for more focused answers
-                                "do_sample": True,
-                                "top_p": 0.9
                             }
                         },
-                        timeout=15
                     )
-                    if response.status_code == 503:  # Model loading
-                        print(f"⏳ Model loading, waiting... (attempt {attempt + 1})")
-                        time.sleep(10)
                         continue
                     response.raise_for_status()
-                    output = response.json()
-                    # Extract generated text
-                    if isinstance(output, list) and len(output) > 0:
-                        raw_answer = output[0].get("generated_text", "")
-                    elif isinstance(output, dict):
-                        raw_answer = output.get("generated_text", "")
                     else:
-                        raw_answer = str(output)
-                    # Postprocess the answer
-                    final_answer = self.postprocess_answer(raw_answer, question)
-                    print(f"✅ Generated answer: {final_answer[:60]}...")
                     return final_answer
                 except requests.exceptions.RequestException as e:
                     if attempt == max_retries - 1:
-                        raise e
                     print(f"⚠️ Request failed (attempt {attempt + 1}), retrying...")
-                    time.sleep(2)
         except Exception as e:
-            error_msg = f"Error processing question: {str(e)}"
             print(f"❌ {error_msg}")
             return error_msg
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """Main function to run agent on all questions and submit results"""
     if not profile:
         return "❌ Please log in with your Hugging Face account first.", None
     username = profile.username or "anonymous"
     agent_code = f"https://huggingface.co/spaces/{SPACE_ID}/tree/main"
-    print(f"🚀 Starting agent run for user: {username}")
     # Initialize the agent
-    agent = HuggingFaceAPIAgent()
     # Fetch questions from GAIA API
     try:
         print("📥 Fetching questions from GAIA API...")
-        questions_response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
         questions_response.raise_for_status()
         questions = questions_response.json()
         print(f"✅ Retrieved {len(questions)} questions")
@@ -160,31 +283,47 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     # Process each question
     answers = []
     log_entries = []
     for i, q in enumerate(questions, 1):
-        print(f"\n🔄 Processing question {i}/{len(questions)}")
         print(f"Task ID: {q.get('task_id', 'Unknown')}")
         try:
             # Get answer from agent
             answer = agent(q["question"])
         except Exception as e:
             answer = f"Error: {str(e)}"
-            print(f"❌ Error processing question: {e}")
         # Prepare submission format
         answers.append({
             "task_id": q["task_id"],
-            "submitted_answer": answer
         })
         # Log for display
         log_entries.append({
             "Task ID": q["task_id"],
-            "Question": q["question"][:100] + "..." if len(q["question"]) > 100 else q["question"],
-            "Submitted Answer": answer[:100] + "..." if len(str(answer)) > 100 else str(answer),
-            "Status": "✅ Completed" if "Error:" not in str(answer) else "❌ Failed"
         })
     # Submit answers to GAIA scoring API
     try:
@@ -198,35 +337,40 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         submit_response = requests.post(
             f"{DEFAULT_API_URL}/submit",
             json=submission_data,
-            timeout=30
         )
         submit_response.raise_for_status()
         result = submit_response.json()
         print(f"✅ Submission successful!")
-        print(f"Score: {result.get('score', 'N/A')}%")
     except Exception as e:
         error_msg = f"❌ Submission failed: {str(e)}"
         print(error_msg)
         return error_msg, pd.DataFrame(log_entries)
-    # Format success message
     score = result.get('score', 'N/A')
     correct_count = result.get('correct_count', 'N/A')
     total_attempted = result.get('total_attempted', 'N/A')
     message = result.get('message', 'No additional message')
-    success_message = f"""✅ **Submission Complete!**
 **📊 Results:**
 - **Score:** {score}%
 - **Correct Answers:** {correct_count}/{total_attempted}
-- **Total Questions:** {len(questions)}
-**📝 Message:** {message}
-**🎯 Target:** 30% ({"✅ ACHIEVED!" if isinstance(score, (int, float)) and score >= 30 else "Keep trying!"})
 """
     print(success_message)
@@ -236,62 +380,85 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 def create_interface():
     """Create the Gradio interface"""
     with gr.Blocks(
-        title="🤖 GAIA Challenge Agent",
-        theme=gr.themes.Soft()
     ) as demo:
         gr.Markdown("""
-        # 🤖 GAIA Challenge Agent
-        An AI agent built to tackle the GAIA benchmark questions using Hugging Face models.
-        **Target:** Achieve 30% accuracy on GAIA evaluation questions.
-        **Instructions:**
-        1. Log in with your Hugging Face account
-        2. Click "🚀 Run Agent & Submit" to start the evaluation
-        3. Wait for the agent to process all questions and submit results
         """)
-        # Login section
         gr.Markdown("### 🔐 Authentication")
-        gr.LoginButton(value="Login with Hugging Face")
-        # Control section
-        gr.Markdown("### 🎮 Controls")
-        with gr.Row():
-            run_button = gr.Button(
-                "🚀 Run Agent & Submit",
-                variant="primary",
-                size="lg"
-            )
-        # Results section
         gr.Markdown("### 📊 Results")
-        status_output = gr.Textbox(
-            label="📋 Status & Results",
-            lines=8,
-            max_lines=15,
-            placeholder="Results will appear here after submission..."
-        )
-        gr.Markdown("### 📝 Detailed Log")
         results_table = gr.DataFrame(
-            label="Agent Processing Log",
-            headers=["Task ID", "Question", "Submitted Answer", "Status"],
-            wrap=True
         )
         # Event handlers
         run_button.click(
             fn=run_and_submit_all,
-            outputs=[status_output, results_table]
         )
         # Footer
         gr.Markdown("""
         ---
-        **Note:** Make sure your `HF_TOKEN` is set in the Space secrets for API access.
         """)
     return demo

 import pandas as pd
 import json
 import time
+import re
 from typing import Dict, List, Any, Optional
 # Config
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MODEL_NAME = "google/flan-t5-large"  # Free model that works well
 SPACE_ID = os.getenv("SPACE_ID", "sirine1712/Final_Assignment_Template")
 HF_TOKEN = os.getenv("HF_TOKEN")
+class GAIAAgent:
+    """Specialized agent for GAIA benchmark questions with proper auth handling"""
     def __init__(self, model: str = MODEL_NAME):
         self.model = model
         self.api_url = f"https://api-inference.huggingface.co/models/{model}"
+        self.headers = self._get_headers()
+    def _get_headers(self) -> dict:
+        """Get proper headers with authentication"""
+        if not HF_TOKEN:
+            print("⚠️ WARNING: HF_TOKEN not found in environment variables")
+            return {"Content-Type": "application/json"}
+        return {
+            "Authorization": f"Bearer {HF_TOKEN}",
+            "Content-Type": "application/json"
+        }
+    def _test_api_access(self) -> bool:
+        """Test if we can access the HF API"""
+        try:
+            test_response = requests.post(
+                self.api_url,
+                headers=self.headers,
+                json={"inputs": "Test connection"},
+                timeout=10
+            )
+            if test_response.status_code == 401:
+                print("❌ Authentication failed - check HF_TOKEN")
+                return False
+            elif test_response.status_code == 503:
+                print("⏳ Model is loading...")
+                return True
+            else:
+                print("✅ API access confirmed")
+                return True
+        except Exception as e:
+            print(f"❌ API test failed: {e}")
+            return False
+    def classify_question_type(self, question: str) -> str:
+        """Classify question type for better processing"""
+        question_lower = question.lower()
+        # Mathematical/computational questions
+        if any(word in question_lower for word in [
+            'calculate', 'compute', 'sum', 'multiply', 'divide', 'subtract',
+            'average', 'mean', 'percentage', 'ratio', 'equation', 'formula',
+            'math', 'arithmetic', 'algebra', '+', '-', '*', '/', '='
+        ]):
+            return "mathematical"
+        # Factual/knowledge questions
+        elif any(word in question_lower for word in [
+            'who is', 'what is', 'when was', 'where is', 'which',
+            'born', 'died', 'founded', 'invented', 'discovered',
+            'capital', 'president', 'author', 'wrote', 'directed'
+        ]):
+            return "factual"
+        # Counting/quantitative questions
+        elif any(word in question_lower for word in [
+            'how many', 'count', 'number of', 'total', 'quantity'
+        ]):
+            return "counting"
+        # Date/time questions
+        elif any(word in question_lower for word in [
+            'year', 'date', 'century', 'decade', 'month', 'day',
+            'age', 'old', 'recent', 'latest', 'first time', 'last time'
+        ]):
+            return "temporal"
+        else:
+            return "general"
+    def format_prompt_by_type(self, question: str, question_type: str) -> str:
+        """Format prompt based on question type for T5 model"""
+        if question_type == "mathematical":
+            return f"solve: {question}"
+        elif question_type == "factual":
+            return f"question: {question}"
+        elif question_type == "counting":
+            return f"count: {question}"
+        elif question_type == "temporal":
+            return f"when: {question}"
+        else:
+            return f"answer: {question}"
+    def extract_clean_answer(self, raw_response: str, question: str, question_type: str) -> str:
+        """Extract and clean the answer from model response"""
+        if not raw_response or len(raw_response.strip()) == 0:
             return "Unable to generate answer"
+        # Clean the response
+        response = raw_response.strip()
+        # For T5 models, often the response is already clean
+        # Remove common artifacts
+        response = re.sub(r'^(answer:|solution:|result:)\s*', '', response, flags=re.IGNORECASE)
+        # Extract specific patterns based on question type
+        if question_type == "mathematical":
+            # Try to extract numerical answer
+            numbers = re.findall(r'-?\d+\.?\d*', response)
+            if numbers:
+                return str(numbers[-1])  # Return the last number found
+        elif question_type == "counting":
+            # Extract the first number found
+            numbers = re.findall(r'\d+', response)
             if numbers:
+                return str(numbers[0])
+        elif question_type == "temporal":
+            # Look for years, dates
+            years = re.findall(r'\b(19|20)\d{2}\b', response)
+            if years:
+                return str(years[0])
+            dates = re.findall(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b', response)
+            if dates:
+                return str(dates[0])
+        # Clean up the response length
+        sentences = response.split('.')
+        if len(sentences) > 0 and len(sentences[0]) > 5:
+            clean_answer = sentences[0].strip()
+            if len(clean_answer) > 100:
+                clean_answer = clean_answer[:100] + "..."
+            return clean_answer
+        # Fallback: return first 100 characters
+        return response[:100] + "..." if len(response) > 100 else response
     def __call__(self, question: str) -> str:
         """Main method to process questions"""
+        print(f"🔍 Processing: {question[:60]}...")
+        # Check API access first
+        if not self._test_api_access():
+            return "API authentication failed - check HF_TOKEN"
         try:
+            # Classify and format the question
+            question_type = self.classify_question_type(question)
+            formatted_prompt = self.format_prompt_by_type(question, question_type)
+            print(f"📝 Question type: {question_type}")
+            # Make API call with retries
             max_retries = 3
             for attempt in range(max_retries):
                 try:
                         self.api_url,
                         headers=self.headers,
                         json={
+                            "inputs": formatted_prompt,
                             "parameters": {
+                                "max_new_tokens": 100,
+                                "temperature": 0.1,  # Very low temperature for precise answers
+                                "do_sample": False,  # Deterministic output
+                                "return_full_text": False
                             }
                         },
+                        timeout=20
                     )
+                    if response.status_code == 401:
+                        return "Authentication error - invalid HF_TOKEN"
+                    elif response.status_code == 503:  # Model loading
+                        wait_time = 15 + (attempt * 10)
+                        print(f"⏳ Model loading, waiting {wait_time}s... (attempt {attempt + 1})")
+                        time.sleep(wait_time)
+                        continue
+                    elif response.status_code == 429:  # Rate limit
+                        wait_time = 5 + (attempt * 5)
+                        print(f"⏳ Rate limited, waiting {wait_time}s...")
+                        time.sleep(wait_time)
                         continue
                     response.raise_for_status()
+                    result = response.json()
+                    # Extract the generated text
+                    if isinstance(result, list) and len(result) > 0:
+                        if 'generated_text' in result[0]:
+                            raw_answer = result[0]['generated_text']
+                        else:
+                            raw_answer = str(result[0])
+                    elif isinstance(result, dict):
+                        raw_answer = result.get('generated_text', str(result))
                     else:
+                        raw_answer = str(result)
+                    # Clean and extract the final answer
+                    final_answer = self.extract_clean_answer(raw_answer, question, question_type)
+                    print(f"✅ Answer: {final_answer}")
                     return final_answer
                 except requests.exceptions.RequestException as e:
                     if attempt == max_retries - 1:
+                        return f"Request failed after {max_retries} attempts: {str(e)}"
                     print(f"⚠️ Request failed (attempt {attempt + 1}), retrying...")
+                    time.sleep(3)
         except Exception as e:
+            error_msg = f"Processing error: {str(e)}"
             print(f"❌ {error_msg}")
             return error_msg
+def check_environment():
+    """Check if environment is properly configured"""
+    issues = []
+    if not HF_TOKEN:
+        issues.append("❌ HF_TOKEN not found in environment variables")
+    else:
+        issues.append("✅ HF_TOKEN found")
+    if not SPACE_ID:
+        issues.append("❌ SPACE_ID not configured")
+    else:
+        issues.append(f"✅ SPACE_ID: {SPACE_ID}")
+    return "\n".join(issues)
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """Main function to run agent on all questions and submit results"""
     if not profile:
         return "❌ Please log in with your Hugging Face account first.", None
+    # Check environment
+    env_status = check_environment()
+    if "❌" in env_status:
+        return f"Environment check failed:\n{env_status}", None
     username = profile.username or "anonymous"
     agent_code = f"https://huggingface.co/spaces/{SPACE_ID}/tree/main"
+    print(f"🚀 Starting GAIA evaluation for user: {username}")
+    print(f"🔧 Environment status:\n{env_status}")
     # Initialize the agent
+    agent = GAIAAgent()
     # Fetch questions from GAIA API
     try:
         print("📥 Fetching questions from GAIA API...")
+        questions_response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
         questions_response.raise_for_status()
         questions = questions_response.json()
         print(f"✅ Retrieved {len(questions)} questions")
     # Process each question
     answers = []
     log_entries = []
+    successful_answers = 0
     for i, q in enumerate(questions, 1):
+        print(f"\n{'='*60}")
+        print(f"🔄 Question {i}/{len(questions)}")
         print(f"Task ID: {q.get('task_id', 'Unknown')}")
+        print(f"Question: {q['question']}")
         try:
             # Get answer from agent
             answer = agent(q["question"])
+            if not answer.startswith(("Error:", "Authentication error", "API authentication failed")):
+                successful_answers += 1
+                status = "✅ Success"
+            else:
+                status = "❌ Failed"
         except Exception as e:
             answer = f"Error: {str(e)}"
+            status = "❌ Exception"
+            print(f"❌ Exception processing question: {e}")
         # Prepare submission format
         answers.append({
             "task_id": q["task_id"],
+            "submitted_answer": str(answer)
         })
         # Log for display
         log_entries.append({
             "Task ID": q["task_id"],
+            "Question": q["question"][:80] + "..." if len(q["question"]) > 80 else q["question"],
+            "Answer": str(answer)[:60] + "..." if len(str(answer)) > 60 else str(answer),
+            "Status": status
         })
+        print(f"Answer: {answer}")
+        print(f"Status: {status}")
+    print(f"\n📊 Processing complete: {successful_answers}/{len(questions)} successful")
     # Submit answers to GAIA scoring API
     try:
         submit_response = requests.post(
             f"{DEFAULT_API_URL}/submit",
             json=submission_data,
+            timeout=60
         )
         submit_response.raise_for_status()
         result = submit_response.json()
         print(f"✅ Submission successful!")
     except Exception as e:
         error_msg = f"❌ Submission failed: {str(e)}"
         print(error_msg)
         return error_msg, pd.DataFrame(log_entries)
+    # Format results
     score = result.get('score', 'N/A')
     correct_count = result.get('correct_count', 'N/A')
     total_attempted = result.get('total_attempted', 'N/A')
     message = result.get('message', 'No additional message')
+    success_message = f"""✅ **GAIA Evaluation Complete!**
 **📊 Results:**
 - **Score:** {score}%
 - **Correct Answers:** {correct_count}/{total_attempted}
+- **Questions Processed:** {len(questions)}
+- **Successful API Calls:** {successful_answers}/{len(questions)}
+**🎯 Target Progress:** {"✅ TARGET ACHIEVED!" if isinstance(score, (int, float)) and score >= 30.0 else f"Need {30.0 - (score if isinstance(score, (int, float)) else 0):.1f}% more to reach 30%"}
+**📝 System Message:** {message}
+**💡 Tips for improvement:**
+- Ensure HF_TOKEN has proper permissions
+- Try running again if API calls failed
+- Check question types that performed poorly
 """
     print(success_message)
 def create_interface():
     """Create the Gradio interface"""
     with gr.Blocks(
+        title="🎯 GAIA Challenge Agent",
+        theme=gr.themes.Soft(),
+        css="""
+        .status-box {
+            background: #f8f9fa;
+            border-left: 4px solid #007bff;
+            padding: 15px;
+        }
+        """
     ) as demo:
         gr.Markdown("""
+        # 🎯 GAIA Challenge Agent
+        **Goal:** Achieve 30% accuracy on the GAIA benchmark
+        This agent uses Google's FLAN-T5-Large model with specialized question processing to tackle GAIA's challenging questions.
+        **Setup Required:**
+        1. Set `HF_TOKEN` in your Space secrets (Settings → Repository secrets)
+        2. Set `SPACE_ID` to your space name (e.g., "username/space-name")
         """)
+        # Environment check
+        with gr.Accordion("🔧 Environment Check", open=False):
+            env_check = gr.Textbox(
+                value=check_environment(),
+                label="Environment Status",
+                lines=3,
+                interactive=False
+            )
+        # Authentication
         gr.Markdown("### 🔐 Authentication")
+        gr.LoginButton(value="🔑 Login with Hugging Face")
+        # Main controls
+        gr.Markdown("### 🚀 Run Evaluation")
+        run_button = gr.Button(
+            "🎯 Start GAIA Evaluation",
+            variant="primary",
+            size="lg"
+        )
+        # Results
         gr.Markdown("### 📊 Results")
+        with gr.Row():
+            status_output = gr.Textbox(
+                label="📋 Evaluation Results",
+                lines=12,
+                max_lines=20,
+                placeholder="Click 'Start GAIA Evaluation' to begin...",
+                elem_classes=["status-box"]
+            )
+        gr.Markdown("### 📝 Question Processing Log")
         results_table = gr.DataFrame(
+            label="Detailed Processing Results",
+            headers=["Task ID", "Question", "Answer", "Status"],
+            wrap=True,
+            max_height=400
         )
         # Event handlers
         run_button.click(
             fn=run_and_submit_all,
+            outputs=[status_output, results_table],
+            show_progress=True
         )
         # Footer
         gr.Markdown("""
         ---
+        **🔍 Troubleshooting:**
+        - **401 Error:** Check that HF_TOKEN is valid and set in Space secrets
+        - **503 Error:** Model is loading, wait and try again
+        - **0% Score:** Check answer format and question processing logic
+        **📚 Model:** google/flan-t5-large (instruction-tuned for better reasoning)
         """)
     return demo