Final_Assignment_Template

Sleeping

App Files Files Community

huytofu92 commited on May 19

Commit

30ffa0e

1 Parent(s): 280d2e0

Add file support and youtube transcript 2nd choice

Browse files

Files changed (9) hide show

.gitattributes +4 -0
app.py +5 -0
audio_tools.py +8 -2
community_tools.py +15 -4
load_data.py +8 -0
mini_agents.py +4 -4
requirements.txt +1 -0
utils.py +71 -0
vlm_tools.py +21 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.pptx filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import gradio as gr
 import requests
 import pandas as pd
 from mini_agents import master_agent
 # (Keep Constants as is)
 # --- Constants ---
@@ -77,10 +78,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool =
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})

 import requests
 import pandas as pd
 from mini_agents import master_agent
+from utils import get_full_file_path
 # (Keep Constants as is)
 # --- Constants ---
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_path = get_full_file_path(task_id)
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            question_text = question_text + "\n\nHere is the task_id for this question: " + task_id
+            if file_path:
+                question_text = question_text + f"\n\nHere is also the path to the file for the task (file name matches with task ID and is not in plain English): {file_path}"
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})

audio_tools.py CHANGED Viewed

@@ -43,8 +43,14 @@ def audio_to_base64(file_path_or_key: str, state: dict) -> str:
         file_path = file_path_or_key
     # Load the audio file
-    audio = AudioSegment.from_file(file_path)
     # Export the audio to a BytesIO object
     buffer = BytesIO()
     audio.export(buffer, format="wav")  # You can change the format if needed

         file_path = file_path_or_key
     # Load the audio file
+    try:
+        audio = AudioSegment.from_file(file_path)
+    except Exception as e:
+        current_file_path = os.path.abspath(__file__)
+        current_file_dir = os.path.dirname(current_file_path)
+        file_path = os.path.join(current_file_dir, file_path.replace("Final_Assignment_Template", ""))
+        audio = AudioSegment.from_file(file_path)
     # Export the audio to a BytesIO object
     buffer = BytesIO()
     audio.export(buffer, format="wav")  # You can change the format if needed

community_tools.py CHANGED Viewed

@@ -2,6 +2,7 @@ from langchain_google_community import GooglePlacesTool
 from langchain_community.agent_toolkits.load_tools import load_tools
 from langchain_community.document_loaders import YoutubeLoader
 from smolagents.tools import Tool, tool
 google_map_tool = Tool.from_langchain(GooglePlacesTool())
@@ -24,7 +25,17 @@ def get_youtube_transcript_from_url(video_url: str)->str:
         The transcript of the YouTube video as a string
     """
     video_id = video_url.split("=")[1]
-    youtube_loader = YoutubeLoader(video_id=video_id)
-    docs = youtube_loader.load()
-    transcript = docs[0].page_content
-    return transcript

 from langchain_community.agent_toolkits.load_tools import load_tools
 from langchain_community.document_loaders import YoutubeLoader
 from smolagents.tools import Tool, tool
+from youtube_transcript_api import YouTubeTranscriptApi
 google_map_tool = Tool.from_langchain(GooglePlacesTool())
         The transcript of the YouTube video as a string
     """
     video_id = video_url.split("=")[1]
+    try:
+        ytt_api = YouTubeTranscriptApi()
+        fetched_transcript = ytt_api.fetch(video_id)
+        # is iterable
+        transcript = ""
+        for snippet in fetched_transcript:
+            transcript += f"{snippet['text']}\n"
+        return transcript
+    except Exception as e:
+        youtube_loader = YoutubeLoader(video_id=video_id)
+        docs = youtube_loader.load()
+        transcript = docs[0].page_content
+        return transcript

load_data.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import datasets
+def download_dataset(dataset_name: str, name: str):
+    dataset = datasets.load_dataset(dataset_name, name, trust_remote_code=True)
+    return dataset
+dataset = download_dataset("gaia-benchmark/GAIA", "2023_all")
+dataset.save_to_disk("GAIA_2023_all")

mini_agents.py CHANGED Viewed

@@ -2,7 +2,7 @@ from smolagents import CodeAgent, InferenceClientModel
 from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
 from tools import tavily_search_tool, visit_webpage_tool
 from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
-from vlm_tools import download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video
 from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
 from community_tools import community_tools, get_youtube_transcript_from_url
 import os
@@ -53,10 +53,10 @@ vlm_model = InferenceClientModel(
 vlm_agent = CodeAgent(
     model=vlm_model,
-    tools=[download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video],
     max_steps=6,
     # prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
-    additional_authorized_imports=["cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io"],
     name="vlm_agent",
     description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
 )
@@ -127,7 +127,7 @@ master_agent = CodeAgent(
     tools=[sort_list, get_youtube_transcript_from_url, *community_tools, tavily_search_tool, visit_webpage_tool],
     add_base_tools=True,
     max_steps=20,
-    additional_authorized_imports=["math", "pandas", "json", "numpy", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'sklearn', 'scipy', 'datetime', 'typing'],
     verbosity_level=logging.INFO,
     planning_interval=4,
     prompt_templates=PROMPT_TEMPLATE["master_agent"],

 from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
 from tools import tavily_search_tool, visit_webpage_tool
 from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
+from vlm_tools import download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video, get_image_from_file
 from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
 from community_tools import community_tools, get_youtube_transcript_from_url
 import os
 vlm_agent = CodeAgent(
     model=vlm_model,
+    tools=[download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video, get_image_from_file],
     max_steps=6,
     # prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
+    additional_authorized_imports=["cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
     name="vlm_agent",
     description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
 )
     tools=[sort_list, get_youtube_transcript_from_url, *community_tools, tavily_search_tool, visit_webpage_tool],
     add_base_tools=True,
     max_steps=20,
+    additional_authorized_imports=["math", "pandas", "json", "numpy", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'sklearn', 'scipy', 'datetime', 'typing', 'markdownify', 'requests', 'json'],
     verbosity_level=logging.INFO,
     planning_interval=4,
     prompt_templates=PROMPT_TEMPLATE["master_agent"],

requirements.txt CHANGED Viewed

@@ -65,6 +65,7 @@ langchain-text-splitters==0.3.8
 langsmith==0.3.42
 lxml==5.4.0
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
 marshmallow==3.26.1
 matplotlib==3.10.3

 langsmith==0.3.42
 lxml==5.4.0
 markdown-it-py==3.0.0
+markdownify==1.1.0
 MarkupSafe==3.0.2
 marshmallow==3.26.1
 matplotlib==3.10.3

utils.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import json
+import os
+from typing import Optional
+from smolagents.tools import tool
+def find_file_by_task_id(task_id: str, metadata_path: str = "Final_Assignment_Template/validation/metadata.jsonl") -> Optional[str]:
+    """
+    Search for a filename that matches a given task_id in the metadata.jsonl file.
+    Args:
+        task_id (str): The task_id to search for
+        metadata_path (str): Path to the metadata.jsonl file. Defaults to the validation directory path.
+    Returns:
+        Optional[str]: The filename if found, None if not found or if task_id has no associated file
+    Example:
+        >>> find_file_by_task_id("32102e3e-d12a-4209-9163-7b3a104efe5d")
+        "32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx"
+    """
+    if not os.path.exists(metadata_path):
+        raise FileNotFoundError(f"Metadata file not found at {metadata_path}")
+    with open(metadata_path, 'r', encoding='utf-8') as f:
+        for line in f:
+            try:
+                data = json.loads(line.strip())
+                if data.get('task_id') == task_id:
+                    filename = data.get('file_name', '')
+                    return filename if filename else None
+            except json.JSONDecodeError:
+                continue
+    return None
+def get_full_file_path(task_id: str, base_dir: str = "Final_Assignment_Template/validation") -> Optional[str]:
+    """
+    Get the full file path for a given task_id if it exists.
+    Args:
+        task_id (str): The task_id to search for
+        base_dir (str): Base directory where files are stored. Defaults to validation directory.
+    Returns:
+        Optional[str]: Full path to the file if found, None if not found
+    Example:
+        >>> get_full_file_path("32102e3e-d12a-4209-9163-7b3a104efe5d")
+        "Final_Assignment_Template/validation/32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx"
+    """
+    filename = find_file_by_task_id(task_id)
+    if not filename:
+        return None
+    full_path = os.path.join(base_dir, filename)
+    return full_path if os.path.exists(full_path) else None
+@tool
+def load_file_from_task_id(task_id: str) -> str:
+    """
+    Load a file related to a given task_id if it exists.
+    Args:
+        task_id: The task_id to load the file for
+    Returns:
+        The file content if found, None if not found
+    """
+    file_path = get_full_file_path(task_id)
+    if not file_path:
+        return "File not found"
+    with open(file_path, 'r') as file:
+        return file.read()

vlm_tools.py CHANGED Viewed

@@ -4,6 +4,7 @@ import pytesseract
 import requests
 import base64
 import onnxruntime
 from io import BytesIO
 from PIL import Image
 from langchain_core.tools import tool as langchain_tool
@@ -103,6 +104,26 @@ def download_image(image_url: str)->str:
     image = base64.b64encode(response.content).decode('utf-8')
     return image
 @tool
 def image_processing(image: str, brightness: float = 1.0, contrast: float = 1.0)->str:
     """

 import requests
 import base64
 import onnxruntime
+import os
 from io import BytesIO
 from PIL import Image
 from langchain_core.tools import tool as langchain_tool
     image = base64.b64encode(response.content).decode('utf-8')
     return image
+@tool
+def get_image_from_file(file_path: str)->str:
+    """
+    Get an image from a file
+    Args:
+        file_path: The path to the file
+    Returns:
+        The image as a base64 string
+    """
+    try:
+        with open(file_path, 'rb') as image_file:
+            image = base64.b64encode(image_file.read()).decode('utf-8')
+    except Exception as e:
+        current_file_path = os.path.abspath(__file__)
+        current_file_dir = os.path.dirname(current_file_path)
+        file_path = os.path.join(current_file_dir, file_path.replace("Final_Assignment_Template", ""))
+        with open(file_path, 'rb') as image_file:
+            image = base64.b64encode(image_file.read()).decode('utf-8')
+    return image
 @tool
 def image_processing(image: str, brightness: float = 1.0, contrast: float = 1.0)->str:
     """