Final_Assignment_Template

Sleeping

App Files Files Community

benjosaur commited on Jun 28

Commit

3869fd1

1 Parent(s): 81917a3

Complete Draft

Browse files

Files changed (5) hide show

.gitignore +3 -0
app.py +85 -32
search.py +68 -0
tools.py +192 -0
utils.py +52 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.env
+__pycache__
+*.pyc

app.py CHANGED Viewed

@@ -3,32 +3,63 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -55,16 +86,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
@@ -76,23 +107,42 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
@@ -162,20 +212,19 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +232,18 @@ if __name__ == "__main__":
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import requests
 import inspect
 import pandas as pd
+from llama_index.core.agent.workflow import AgentWorkflow, ToolCallResult, AgentStream
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+from tools import APIProcessor, parse_youtube_video, transcribe_image_from_link
+from search import GoogleSearch
+from dotenv import load_dotenv
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+SYSTEM_PROMPT = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
 class BasicAgent:
     def __init__(self):
+        self.llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
         print("BasicAgent initialized.")
+    def __call__(self, question: str, task_id: str, file_name: str) -> str:
+        google_search = GoogleSearch().google_search
+        google_image_search = GoogleSearch().google_image_search
+        get_and_process_question_attachment = APIProcessor(
+            file_url=DEFAULT_API_URL + "/files/" + task_id, file_name=file_name
+        ).get_and_process_attachment()
+        agent = AgentWorkflow.from_tools_or_functions(
+            [
+                google_search,
+                google_image_search,
+                get_and_process_question_attachment,
+                parse_youtube_video,
+                transcribe_image_from_link,
+            ],
+            llm=self.llm,
+            system_prompt=SYSTEM_PROMPT,
+        )
+        response = agent.run(question)
+        return response
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text, task_id, file_name)
+            answers_payload.append(
+                {"task_id": task_id, "submitted_answer": submitted_answer}
+            )
+            results_log.append(
+                {
+                    "Task ID": task_id,
+                    "Question": question_text,
+                    "Submitted Answer": submitted_answer,
+                }
+            )
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append(
+                {
+                    "Task ID": task_id,
+                    "Question": question_text,
+                    "Submitted Answer": f"AGENT ERROR: {e}",
+                }
+            )
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload,
+    }
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(
+        label="Run Status / Submission Result", lines=5, interactive=False
+    )
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
+    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:  # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(
+            f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
+        )
     else:
+        print(
+            "ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
+        )
+    print("-" * (60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

search.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from dotenv import load_dotenv
+import os
+import aiohttp
+class GoogleSearch:
+    def __init__(self):
+        load_dotenv()
+        self.api_key = os.environ["GOOGLE_API_KEY"]
+        self.cse_id = os.getenv("GOOGLE_CSE_ID")
+    async def google_search(self, query: str, num_results: int = 5) -> str:
+        """
+        Args:
+            query: Search query
+            num_results: Max results to return
+        Returns:
+            dict: JSON response from Google API.
+        """
+        if not self.api_key or not self.cse_id:
+            raise ValueError(
+                "GOOGLE_API_KEY and GOOGLE_CSE_ID must be set in environment variables."
+            )
+        url = "https://www.googleapis.com/customsearch/v1"
+        params = {"key": self.api_key, "cx": self.cse_id, "q": query}
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, params=params) as response:
+                response.raise_for_status()
+                data = await response.json()
+                results = "Web Search results:\n\n" + "\n\n".join(
+                    [
+                        f"Link:{result['link']}\nTitle:{result['title']}\nSnippet:{result['snippet']}"
+                        for result in data["items"][:num_results]
+                    ]
+                )
+                return results
+    async def google_image_search(self, query: str, num_results: int = 5) -> str:
+        """
+        Args:
+            query: Search query
+            num_results: Max results to return
+        Returns:
+            dict: JSON response from Google API.
+        """
+        if not self.api_key or not self.cse_id:
+            raise ValueError(
+                "GOOGLE_API_KEY and GOOGLE_CSE_ID must be set in environment variables."
+            )
+        url = "https://www.googleapis.com/customsearch/v1"
+        params = {"key": self.api_key, "cx": self.cse_id, "q": query}
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, params=params) as response:
+                response.raise_for_status()
+                data = await response.json()
+                results = "Web Search results:\n\n" + "\n\n".join(
+                    [
+                        f"Link:{result['link']}\nTitle:{result['title']}"
+                        for result in data["items"][:num_results]
+                    ]
+                )
+                return results

tools.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import requests
+from dotenv import load_dotenv
+from openai import OpenAI
+from utils import process_image_for_gpt
+import pandas as pd
+import tempfile
+import os
+import io
+import yt_dlp
+def transcribe_image_from_link(image_link: str) -> str:
+    """
+    Args:
+        image_link (str): URL of the image to transcribe
+    """
+    client = OpenAI()  # Uses OPENAI_API_KEY environment variable
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": """Please transcribe all text visible in this image.
+                        Extract the text exactly as it appears, maintaining formatting when possible.
+                        If there's no readable text, respond with 'No text found in image'.""",
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": image_link,
+                            "detail": "high",
+                        },
+                    },
+                ],
+            }
+        ],
+        max_tokens=1000,
+        temperature=0,
+    )
+    transcribed_text = response.choices[0].message.content.strip()
+    return transcribed_text
+def parse_youtube_video(youtube_url: str) -> str:
+    """Returns text transcript of a youtube video
+    Args:
+        youtube_url: the full url linking to the video to transcribe
+    """
+    load_dotenv()
+    client = OpenAI()
+    # Configure yt-dlp to extract audio
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "postprocessors": [
+            {
+                "key": "FFmpegExtractAudio",
+                "preferredcodec": "mp3",
+                "preferredquality": "192",
+            }
+        ],
+        "outtmpl": "%(title)s.%(ext)s",
+    }
+    with tempfile.TemporaryDirectory() as temp_dir:
+        ydl_opts["outtmpl"] = os.path.join(temp_dir, "%(title)s.%(ext)s")
+        # Download audio
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(youtube_url, download=True)
+            title = info["title"]
+        # Find the downloaded audio file
+        audio_file = None
+        for file in os.listdir(temp_dir):
+            if file.endswith(".mp3"):
+                audio_file = os.path.join(temp_dir, file)
+                break
+        if not audio_file:
+            raise Exception("Audio file not found")
+        # Transcribe with Whisper
+        with open(audio_file, "rb") as audio:
+            transcript = client.audio.transcriptions.create(
+                model="gpt-4o-transcribe", file=audio
+            )
+        return {"title": title, "transcript": transcript.text}
+class APIProcessor:
+    def __init__(self, file_url: str, file_name: str):
+        load_dotenv()
+        self.file_url = file_url
+        self.file_name = file_name
+        self.client = OpenAI()
+    def _transcribe_mp3(self, response: requests.Response) -> str:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
+            for chunk in response.iter_content(chunk_size=8192):
+                temp_file.write(chunk)
+            temp_file_path = temp_file.name
+        try:
+            with open(temp_file_path, "rb") as audio_file:
+                transcription = self.client.audio.transcriptions.create(
+                    model="gpt-4o-transcribe",
+                    file=audio_file,
+                )
+            return transcription.text
+        except Exception as e:
+            print(str(e))
+        finally:
+            os.unlink(temp_file_path)
+    def _transcribe_image(self, response: requests.Response) -> str:
+        image_bytes = response.content
+        base64_image = process_image_for_gpt(image_bytes)
+        TRANSCRIPTION_PROMPT = """Please in detail transcribe as much of the output information you can via text. Feel free to use ASCII."""
+        image_message = [
+            {"type": "text", "text": TRANSCRIPTION_PROMPT},
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": f"data:image/jpeg;base64,{base64_image}",
+                },
+            },
+        ]
+        response = self.client.chat.completions.create(
+            model="gpt-4o",
+            messages=[{"role": "user", "content": image_message}],
+            max_tokens=1000,
+        )
+        return response.choices[0].message.content
+    def _transcribe_spreadsheet(self, response: requests.Response) -> str:
+        try:
+            excel_data = io.BytesIO(response.content)
+            excel_file = pd.ExcelFile(excel_data)
+            sheets = excel_file.sheet_names
+            all_sheets_data = {}
+            for sheet in sheets:
+                df = excel_file.parse(sheet_name=sheet)
+                all_sheets_data[sheet] = df.to_string()
+            return str(all_sheets_data)
+        except Exception as e:
+            return f"Error processing spreadsheet: {e}"
+    def get_and_process_attachment(self) -> str:
+        """For current question, download and process the file associated if it exists.
+        Returns:
+            Parsed text output of the attachment
+        """
+        response = requests.get(self.file_url, timeout=15)
+        response.raise_for_status()
+        file_extension = self.file_name.split(".")[-1]
+        if file_extension == "mp3":
+            parsed_text = self._transcribe_mp3(response)
+        elif file_extension == "xlsx":
+            parsed_text = self._transcribe_spreadsheet(response)
+        elif file_extension == "png":
+            parsed_text = self._transcribe_image(response)
+        else:
+            parsed_text = response.content
+        return parsed_text
+if __name__ == "__main__":
+    # attempt to process file examples from API
+    # def get_file_api_url(task_id: str) -> str:
+    #     return "https://agents-course-unit4-scoring.hf.space" + "/files/" + task_id
+    # audio_task_processor = APIProcessor(
+    #     file_name="7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx",
+    #     file_url=get_file_api_url("7bd855d8-463d-4ed5-93ca-5fe35145f733"),
+    # )
+    # response = audio_task_processor.get_and_process_attachment()
+    # print(response)
+    result = parse_youtube_video("https://www.youtube.com/watch?v=1htKBjuUWec")
+    print(result)

utils.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from PIL import Image
+from io import BytesIO
+import base64
+def encode_image_in_base64(image: bytes):
+    return base64.b64encode(image).decode("utf-8")
+def replace_transparent_pixels(image_bytes: bytes):
+    """
+    Opens a PNG image, and replaces transparent pixels with white pixels.
+    Args:
+    image_path: The path to the PNG image.
+    Returns:
+    The path to the modified image.
+    """
+    try:
+        img = Image.open(BytesIO(image_bytes))
+        img = img.convert("RGBA")
+        pixels = img.getdata()
+        new_pixels = []
+        for item in pixels:
+            if item[3] == 0:
+                new_pixels.append((255, 255, 255, 255))
+            else:
+                new_pixels.append(item)
+        img.putdata(new_pixels)
+        img_byte_arr = BytesIO()
+        img.save(img_byte_arr, format="PNG")
+        img_byte_arr = img_byte_arr.getvalue()
+        return img_byte_arr
+    except FileNotFoundError:
+        print(f"Error: The file was not found.")
+        return None
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
+def process_image_for_gpt(image_bytes: bytes) -> str:
+    image_bytes = replace_transparent_pixels(image_bytes)
+    base64_image = encode_image_in_base64(image_bytes)
+    return base64_image