Final_Assignment_Template

Sleeping

App Files Files Community

josondev commited on Jun 24

Commit

1a8d658

verified ·

1 Parent(s): 163f5c1

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -33

app.py CHANGED Viewed

@@ -2,43 +2,82 @@ import os
 import gradio as gr
 import requests
 import pandas as pd
 from dotenv import load_dotenv
-from langchain_openai import ChatOpenAI
-from langchain_nvidia_ai_endpoints import ChatNVIDIA
 # Load environment variables
 load_dotenv()
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-class BasicAgent:
-    def __init__(self):
-        self.llm=ChatNVIDIA(model="meta/llama-3.3-70b-instruct",nvidia_api_key=os.getenv("NVIDIA_API_KEY"))
-        self.instructions = """You are a helpful assistant. For every question, reply with only the answer—no explanation, "
             "no units, and no extra words. If the answer is a number, just return the number. "
             "If it is a word or phrase, return only that. If it is a list, return a comma-separated list with no extra words. "
-            "Do not include any prefix, suffix, or explanation."""
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
         prompt = f"{self.instructions}\n\n{question}"
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        response = self.llm.invoke(prompt)
-        answer = response.content.strip() if hasattr(response, "content") else str(response)
-        print(f"Agent returning answer: {answer}")
-        return answer
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results.
-    """
-    space_id = os.getenv("SPACE_ID")  # For codebase link
     if profile:
         username = f"{profile.username}"
         print(f"User logged in: {username}")
@@ -50,9 +89,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -60,7 +98,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
-    # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
@@ -81,18 +118,19 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
@@ -103,12 +141,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
-    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
@@ -163,7 +199,7 @@ with gr.Blocks() as demo:
         ---
         **Disclaimers:**
         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )

 import gradio as gr
 import requests
 import pandas as pd
+import base64
 from dotenv import load_dotenv
+from groq import Groq
 # Load environment variables
 load_dotenv()
+# --- Groq Multimodal Agent ---
+class GroqMultimodalAgent:
+    def __init__(self):
+        self.client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+        self.llava_model = "llava-v1.5-7b-4096-preview"  # For image Q&A
+        self.llama_model = "llama-3-70b-8192"            # For text Q&A
+        self.whisper_model = "whisper-large-v3"          # For audio transcription
+        self.instructions = (
+            "You are a helpful assistant. For every question or media, reply with only the answer—no explanation, "
             "no units, and no extra words. If the answer is a number, just return the number. "
             "If it is a word or phrase, return only that. If it is a list, return a comma-separated list with no extra words. "
+            "Do not include any prefix, suffix, or explanation."
+        )
+    def _encode_image(self, image_path):
+        with open(image_path, "rb") as img_file:
+            return base64.b64encode(img_file.read()).decode("utf-8")
+    def _process_image(self, image_path, question):
+        base64_image = self._encode_image(image_path)
+        prompt = f"{self.instructions}\n\n{question}"
+        chat_completion = self.client.chat.completions.create(
+            model=self.llava_model,
+            messages=[
+                {"role": "user", "content": [
+                    {"type": "text", "text": prompt},
+                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
+                ]}
+            ]
+        )
+        answer = chat_completion.choices[0].message.content.strip()
+        return self._extract_final_answer(answer)
+    def _process_audio(self, audio_path):
+        with open(audio_path, "rb") as audio_file:
+            transcript = self.client.audio.transcriptions.create(
+                model=self.whisper_model,
+                file=audio_file
+            )
+        return transcript.text.strip()
+    def _process_text(self, question):
         prompt = f"{self.instructions}\n\n{question}"
+        chat_completion = self.client.chat.completions.create(
+            model=self.llama_model,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        answer = chat_completion.choices[0].message.content.strip()
+        return self._extract_final_answer(answer)
+    def _extract_final_answer(self, llm_output: str) -> str:
+        for prefix in ["FINAL ANSWER:", "Final answer:", "final answer:"]:
+            if llm_output.lower().startswith(prefix.lower()):
+                return llm_output[len(prefix):].strip()
+        return llm_output
+    def __call__(self, question: str, image_path: str = None, audio_path: str = None) -> str:
+        if image_path:
+            return self._process_image(image_path, question)
+        elif audio_path:
+            return self._process_audio(audio_path)
+        else:
+            return self._process_text(question)
+# --- Gradio Leaderboard Submission App ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
         print(f"User logged in: {username}")
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     try:
+        agent = GroqMultimodalAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        image_path = item.get("image_path", None)
+        audio_path = item.get("audio_path", None)
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text, image_path=image_path, audio_path=audio_path)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         ---
         **Disclaimers:**
         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a separate action or even to answer the questions in async.
         """
     )