huytofu92 commited on
Commit
b28ae74
·
1 Parent(s): a2cf089

Add frame extraction

Browse files
Files changed (2) hide show
  1. mini_agents.py +2 -2
  2. vlm_tools.py +19 -0
mini_agents.py CHANGED
@@ -1,7 +1,7 @@
1
  from smolagents import CodeAgent, InferenceClientModel
2
  from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
3
  from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
4
- from vlm_tools import download_image, image_processing, object_detection_tool, ocr_scan
5
  from audio_tools import audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
6
  from community_tools import community_tools
7
  import os
@@ -40,7 +40,7 @@ vlm_model = InferenceClientModel(
40
 
41
  vlm_agent = CodeAgent(
42
  model=vlm_model,
43
- tools=[download_image, image_processing, object_detection_tool, ocr_scan],
44
  max_steps=4,
45
  name="vlm_agent",
46
  description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
 
1
  from smolagents import CodeAgent, InferenceClientModel
2
  from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
3
  from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
4
+ from vlm_tools import download_image, image_processing, object_detection_tool, ocr_scan, extract_frames_from_video
5
  from audio_tools import audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
6
  from community_tools import community_tools
7
  import os
 
40
 
41
  vlm_agent = CodeAgent(
42
  model=vlm_model,
43
+ tools=[download_image, image_processing, object_detection_tool, ocr_scan, extract_frames_from_video],
44
  max_steps=4,
45
  name="vlm_agent",
46
  description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
vlm_tools.py CHANGED
@@ -72,6 +72,25 @@ def post_processing(onnx_output, classes, original_shape, conf_threshold=0.5, nm
72
 
73
  return detected_objects
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  @tool
76
  def download_image(image_url: str)->str:
77
  """
 
72
 
73
  return detected_objects
74
 
75
+ @tool
76
+ def extract_frames_from_video(video_path: str) -> list:
77
+ """
78
+ Extract frames from a video
79
+ Args:
80
+ video_path: The path to the video file
81
+ Returns:
82
+ A list of frames as numpy arrays
83
+ """
84
+ cap = cv2.VideoCapture(video_path)
85
+ frames = []
86
+ while cap.isOpened():
87
+ ret, frame = cap.read()
88
+ if not ret:
89
+ break
90
+ frames.append(frame)
91
+ cap.release()
92
+ return frames
93
+
94
  @tool
95
  def download_image(image_url: str)->str:
96
  """