Final_Assignment_Template

Sleeping

App Files Files Community

altozachmo commited on Apr 28

Commit

9ff7774

1 Parent(s): 50df397

remove logging change

Browse files

Files changed (7) hide show

agents/agent.py +4 -4
app.py +6 -2
run_local_agent.py +1 -0
test.py +1 -0
tools/text_search.py +2 -1
tools/text_splitter.py +3 -2
tools/video_analyzer.py +60 -51

agents/agent.py CHANGED Viewed

@@ -8,6 +8,7 @@ from tools.text_search import TextSearch
 from tools.text_splitter import text_splitter
 from tools.video_analyzer import YouTubeObjectCounterTool
 class MyAgent:
     def __init__(
         self,
@@ -45,12 +46,11 @@ class MyAgent:
             DuckDuckGoSearchTool(),  # Search tool for web queries
             WikipediaSearchTool(),  # Search tool for Wikipedia queries
             TextSearch(),  # Search tool for text queries
-            text_splitter, # Text splitter tool for breaking down large texts
-                           # into manageable lists.
-            YouTubeObjectCounterTool(), # Tool for analyzing YouTube videos
         ]
         # Initialize the agent with the specified provider and model ID
         if provider == "litellm":
             self.agent = CodeAgent(

 from tools.text_splitter import text_splitter
 from tools.video_analyzer import YouTubeObjectCounterTool
 class MyAgent:
     def __init__(
         self,
             DuckDuckGoSearchTool(),  # Search tool for web queries
             WikipediaSearchTool(),  # Search tool for Wikipedia queries
             TextSearch(),  # Search tool for text queries
+            text_splitter,  # Text splitter tool for breaking down large texts
+            # into manageable lists.
+            YouTubeObjectCounterTool(),  # Tool for analyzing YouTube videos
         ]
         # Initialize the agent with the specified provider and model ID
         if provider == "litellm":
             self.agent = CodeAgent(

app.py CHANGED Viewed

@@ -70,7 +70,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for item in tqdm(questions_data[0:3], desc="Agent is answering questions...", total=len(questions_data)):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
@@ -78,7 +82,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             continue
         try:
             submitted_answer = agent(question_text)
-            time.sleep(30) # to avoid rate limiting
             answers_payload.append(
                 {"task_id": task_id, "submitted_answer": submitted_answer}
             )

     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    for item in tqdm(
+        questions_data[0:3],
+        desc="Agent is answering questions...",
+        total=len(questions_data),
+    ):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             continue
         try:
             submitted_answer = agent(question_text)
+            time.sleep(30)  # to avoid rate limiting
             answers_payload.append(
                 {"task_id": task_id, "submitted_answer": submitted_answer}
             )

run_local_agent.py CHANGED Viewed

@@ -4,6 +4,7 @@ from utils import run_agent
 import os
 import json
 from dotenv import load_dotenv
 load_dotenv()
 QUESTIONS_FILEPATH: str = os.getenv("QUESTIONS_FILEPATH", default="metadata.jsonl")

 import os
 import json
 from dotenv import load_dotenv
 load_dotenv()
 QUESTIONS_FILEPATH: str = os.getenv("QUESTIONS_FILEPATH", default="metadata.jsonl")

test.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from smolagents import LiteLLMModel, OpenAIServerModel
 from dotenv import load_dotenv
 load_dotenv()
 model_id = "ollama_chat/mistral-small3.1:latest"

 from smolagents import LiteLLMModel, OpenAIServerModel
 from dotenv import load_dotenv
 load_dotenv()
 model_id = "ollama_chat/mistral-small3.1:latest"

tools/text_search.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from smolagents import Tool
 class TextSearch(Tool):
     name: str = "text_search_tool"
     description: str = "This tool searches through a string for substrings and returns the indices of all occurances of that substring."
@@ -11,7 +12,7 @@ class TextSearch(Tool):
         "search_text": {
             "type": "string",
             "description": "The text to search for within source_text.",
-        }
     }
     output_type: str = "array"

 from smolagents import Tool
 class TextSearch(Tool):
     name: str = "text_search_tool"
     description: str = "This tool searches through a string for substrings and returns the indices of all occurances of that substring."
         "search_text": {
             "type": "string",
             "description": "The text to search for within source_text.",
+        },
     }
     output_type: str = "array"

tools/text_splitter.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from smolagents import tool
 @tool
 def text_splitter(text: str, separator: str = "\n") -> list[str]:
     """
-    Splits the input text string into a list on `separator` which
-    defaults to the newline character. This is useful for when
     you need to browse through a large text file that may contain
     a list your are interested in.

 from smolagents import tool
 @tool
 def text_splitter(text: str, separator: str = "\n") -> list[str]:
     """
+    Splits the input text string into a list on `separator` which
+    defaults to the newline character. This is useful for when
     you need to browse through a large text file that may contain
     a list your are interested in.

tools/video_analyzer.py CHANGED Viewed

@@ -6,8 +6,7 @@ from yt_dlp import YoutubeDL
 from transformers import pipeline
 from typing import Any
 from PIL import Image
-import numpy as np
-from transformers import logging
 class YouTubeObjectCounterTool(Tool):
     name = "youtube_object_counter"
@@ -15,12 +14,12 @@ class YouTubeObjectCounterTool(Tool):
     inputs = {
         "url": {
             "type": "string",
-            "description": "The URL of the YouTube video to analyze."
         },
         "label": {
             "type": "string",
-            "description": "The type of object to count (e.g., 'bird', 'person', 'car', 'dog'). Use common object names recognized by standard object detection models."
-        }
     }
     output_type = "string"
@@ -28,16 +27,16 @@ class YouTubeObjectCounterTool(Tool):
         """Downloads the YouTube video to a temporary file."""
         print(f"Downloading video from {url}...")
         temp_dir = tempfile.mkdtemp()
         video_path = os.path.join(temp_dir, "video.mp4")
         ydl_opts = {
-            'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
-            'outtmpl': video_path,
-            'quiet': True,
-            'no_warnings': True
         }
         try:
             with YoutubeDL(ydl_opts) as ydl:
                 ydl.download([url])
@@ -50,22 +49,24 @@ class YouTubeObjectCounterTool(Tool):
     def _count_objects_in_frame(self, frame, label: str):
         """Counts objects of specified label in a single frame using the object detection model."""
         try:
             # Convert OpenCV BGR frame to RGB
             rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             # Convert numpy array to PIL Image
             pil_image = Image.fromarray(rgb_frame)
             # Load the detector
             detector = pipeline("object-detection", model="facebook/detr-resnet-50")
             # Run detection with PIL Image
             results = detector(pil_image)
             # Count objects matching the label
-            object_count = sum(1 for result in results if label.lower() in result['label'].lower())
             return object_count
         except Exception as e:
             print(f"Error detecting objects in frame: {str(e)}")
@@ -74,65 +75,73 @@ class YouTubeObjectCounterTool(Tool):
     def _analyze_video(self, video_path: str, label: str) -> dict[str, Any]:
         """Analyzes the video frame by frame and counts objects of the specified label."""
         sample_rate = 30
-        print(f"Analyzing video {video_path}, looking for '{label}' objects, sampling every {sample_rate} frames...")
         # Open the video file
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
             raise RuntimeError(f"Error: Could not open video file {video_path}")
         # Get video properties
         fps = cap.get(cv2.CAP_PROP_FPS)
         frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         duration = frame_count / fps
         # Initialize results
         frame_results = []
         total_objects = 0
         max_objects = 0
         max_objects_frame = 0
         frame_idx = 0
         # Process frames
         while cap.isOpened():
             ret, frame = cap.read()
             if not ret:
                 break
             # Only process every nth frame
             if frame_idx % sample_rate == 0:
                 time_point = frame_idx / fps
                 print(f"Processing frame {frame_idx} at time {time_point:.2f}s...")
                 object_count = self._count_objects_in_frame(frame, label)
                 total_objects += object_count
                 if object_count > max_objects:
                     max_objects = object_count
                     max_objects_frame = frame_idx
-                frame_results.append({
-                    "frame": frame_idx,
-                    "time": time_point,
-                    "object_count": object_count
-                })
             frame_idx += 1
         # Release resources
         cap.release()
         # Calculate statistics
-        avg_objects_per_frame = total_objects / len(frame_results) if frame_results else 0
         max_objects_time = max_objects_frame / fps if max_objects_frame else 0
         # Clean up the temporary file
         try:
             os.remove(video_path)
             print(f"Deleted temporary video file: {video_path}")
         except Exception as e:
-            print(f"Warning: Failed to delete temporary video file: {video_path} | {str(e)}")
         return {
             "frame_results": frame_results,
             "total_frames_analyzed": len(frame_results),
@@ -143,48 +152,48 @@ class YouTubeObjectCounterTool(Tool):
             "max_objects_in_single_frame": max_objects,
             "max_objects_frame": max_objects_frame,
             "max_objects_time": max_objects_time,
-            "label": label
         }
     def forward(self, url: str, label: str) -> str:
         """
         Analyzes a YouTube video frame by frame and counts objects of the specified type.
         Args:
             url (str): The URL of the YouTube video to analyze.
             label (str): The type of object to count (e.g., 'bird', 'person', 'car', 'dog').
         Returns:
             str: A detailed report of object counts per frame and summary statistics.
         """
-        logging.set_verbosity_error()
         try:
             # Download the video
             video_path = self._download_video(url)
             # Analyze the video
             results = self._analyze_video(video_path, label)
             # Generate a report
             report = [
                 f"# {label.title()} Count Analysis for YouTube Video",
                 f"Video URL: {url}",
                 f"Video duration: {results['video_duration']:.2f} seconds",
                 f"Analyzed {results['total_frames_analyzed']} frames out of {results['total_frames']} total frames",
-                f"Sampling rate: 1 frame every 30 frames (approximately {results['fps']/30:.2f} frames per second)",
                 "## Summary",
                 f"Average {label}s per analyzed frame: {results['average_objects_per_analyzed_frame']:.2f}",
                 f"Maximum {label}s in a single frame: {results['max_objects_in_single_frame']} (at {results['max_objects_time']:.2f} seconds)",
             ]
             # Add frame-by-frame details
             report.append("## Frame-by-Frame Analysis")
             for result in results["frame_results"]:
-                report.append(f"Frame {result['frame']} (Time: {result['time']:.2f}s): {result['object_count']} {label}s")
             return "\n".join(report)
         except Exception as e:
             return f"Error analyzing video: {str(e)}"

 from transformers import pipeline
 from typing import Any
 from PIL import Image
 class YouTubeObjectCounterTool(Tool):
     name = "youtube_object_counter"
     inputs = {
         "url": {
             "type": "string",
+            "description": "The URL of the YouTube video to analyze.",
         },
         "label": {
             "type": "string",
+            "description": "The type of object to count (e.g., 'bird', 'person', 'car', 'dog'). Use common object names recognized by standard object detection models.",
+        },
     }
     output_type = "string"
         """Downloads the YouTube video to a temporary file."""
         print(f"Downloading video from {url}...")
         temp_dir = tempfile.mkdtemp()
         video_path = os.path.join(temp_dir, "video.mp4")
         ydl_opts = {
+            "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
+            "outtmpl": video_path,
+            "quiet": True,
+            "no_warnings": True,
         }
         try:
             with YoutubeDL(ydl_opts) as ydl:
                 ydl.download([url])
     def _count_objects_in_frame(self, frame, label: str):
         """Counts objects of specified label in a single frame using the object detection model."""
         try:
             # Convert OpenCV BGR frame to RGB
             rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             # Convert numpy array to PIL Image
             pil_image = Image.fromarray(rgb_frame)
             # Load the detector
             detector = pipeline("object-detection", model="facebook/detr-resnet-50")
             # Run detection with PIL Image
             results = detector(pil_image)
             # Count objects matching the label
+            object_count = sum(
+                1 for result in results if label.lower() in result["label"].lower()
+            )
             return object_count
         except Exception as e:
             print(f"Error detecting objects in frame: {str(e)}")
     def _analyze_video(self, video_path: str, label: str) -> dict[str, Any]:
         """Analyzes the video frame by frame and counts objects of the specified label."""
         sample_rate = 30
+        print(
+            f"Analyzing video {video_path}, looking for '{label}' objects, sampling every {sample_rate} frames..."
+        )
         # Open the video file
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
             raise RuntimeError(f"Error: Could not open video file {video_path}")
         # Get video properties
         fps = cap.get(cv2.CAP_PROP_FPS)
         frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         duration = frame_count / fps
         # Initialize results
         frame_results = []
         total_objects = 0
         max_objects = 0
         max_objects_frame = 0
         frame_idx = 0
         # Process frames
         while cap.isOpened():
             ret, frame = cap.read()
             if not ret:
                 break
             # Only process every nth frame
             if frame_idx % sample_rate == 0:
                 time_point = frame_idx / fps
                 print(f"Processing frame {frame_idx} at time {time_point:.2f}s...")
                 object_count = self._count_objects_in_frame(frame, label)
                 total_objects += object_count
                 if object_count > max_objects:
                     max_objects = object_count
                     max_objects_frame = frame_idx
+                frame_results.append(
+                    {
+                        "frame": frame_idx,
+                        "time": time_point,
+                        "object_count": object_count,
+                    }
+                )
             frame_idx += 1
         # Release resources
         cap.release()
         # Calculate statistics
+        avg_objects_per_frame = (
+            total_objects / len(frame_results) if frame_results else 0
+        )
         max_objects_time = max_objects_frame / fps if max_objects_frame else 0
         # Clean up the temporary file
         try:
             os.remove(video_path)
             print(f"Deleted temporary video file: {video_path}")
         except Exception as e:
+            print(
+                f"Warning: Failed to delete temporary video file: {video_path} | {str(e)}"
+            )
         return {
             "frame_results": frame_results,
             "total_frames_analyzed": len(frame_results),
             "max_objects_in_single_frame": max_objects,
             "max_objects_frame": max_objects_frame,
             "max_objects_time": max_objects_time,
+            "label": label,
         }
     def forward(self, url: str, label: str) -> str:
         """
         Analyzes a YouTube video frame by frame and counts objects of the specified type.
         Args:
             url (str): The URL of the YouTube video to analyze.
             label (str): The type of object to count (e.g., 'bird', 'person', 'car', 'dog').
         Returns:
             str: A detailed report of object counts per frame and summary statistics.
         """
         try:
             # Download the video
             video_path = self._download_video(url)
             # Analyze the video
             results = self._analyze_video(video_path, label)
             # Generate a report
             report = [
                 f"# {label.title()} Count Analysis for YouTube Video",
                 f"Video URL: {url}",
                 f"Video duration: {results['video_duration']:.2f} seconds",
                 f"Analyzed {results['total_frames_analyzed']} frames out of {results['total_frames']} total frames",
+                f"Sampling rate: 1 frame every 30 frames (approximately {results['fps'] / 30:.2f} frames per second)",
                 "## Summary",
                 f"Average {label}s per analyzed frame: {results['average_objects_per_analyzed_frame']:.2f}",
                 f"Maximum {label}s in a single frame: {results['max_objects_in_single_frame']} (at {results['max_objects_time']:.2f} seconds)",
             ]
             # Add frame-by-frame details
             report.append("## Frame-by-Frame Analysis")
             for result in results["frame_results"]:
+                report.append(
+                    f"Frame {result['frame']} (Time: {result['time']:.2f}s): {result['object_count']} {label}s"
+                )
             return "\n".join(report)
         except Exception as e:
             return f"Error analyzing video: {str(e)}"