Final_Assignment_Template

Sleeping

huytofu92 commited on May 16

Commit

b28ae74

1 Parent(s): a2cf089

Add frame extraction

Files changed (2) hide show

mini_agents.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from smolagents import CodeAgent, InferenceClientModel
 from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
 from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
-from vlm_tools import download_image, image_processing, object_detection_tool, ocr_scan
 from audio_tools import audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
 from community_tools import community_tools
 import os
@@ -40,7 +40,7 @@ vlm_model = InferenceClientModel(
 vlm_agent = CodeAgent(
     model=vlm_model,
-    tools=[download_image, image_processing, object_detection_tool, ocr_scan],
     max_steps=4,
     name="vlm_agent",
     description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."

 from smolagents import CodeAgent, InferenceClientModel
 from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
 from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
+from vlm_tools import download_image, image_processing, object_detection_tool, ocr_scan, extract_frames_from_video
 from audio_tools import audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
 from community_tools import community_tools
 import os
 vlm_agent = CodeAgent(
     model=vlm_model,
+    tools=[download_image, image_processing, object_detection_tool, ocr_scan, extract_frames_from_video],
     max_steps=4,
     name="vlm_agent",
     description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."

vlm_tools.py CHANGED Viewed

@@ -72,6 +72,25 @@ def post_processing(onnx_output, classes, original_shape, conf_threshold=0.5, nm
     return detected_objects
 @tool
 def download_image(image_url: str)->str:
     """

     return detected_objects
+@tool
+def extract_frames_from_video(video_path: str) -> list:
+    """
+    Extract frames from a video
+    Args:
+        video_path: The path to the video file
+    Returns:
+        A list of frames as numpy arrays
+    """
+    cap = cv2.VideoCapture(video_path)
+    frames = []
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frames.append(frame)
+    cap.release()
+    return frames
 @tool
 def download_image(image_url: str)->str:
     """