Spaces:
Sleeping
Sleeping
Add frame extraction
Browse files- mini_agents.py +2 -2
- vlm_tools.py +19 -0
mini_agents.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from smolagents import CodeAgent, InferenceClientModel
|
2 |
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
|
3 |
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
|
4 |
-
from vlm_tools import download_image, image_processing, object_detection_tool, ocr_scan
|
5 |
from audio_tools import audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
|
6 |
from community_tools import community_tools
|
7 |
import os
|
@@ -40,7 +40,7 @@ vlm_model = InferenceClientModel(
|
|
40 |
|
41 |
vlm_agent = CodeAgent(
|
42 |
model=vlm_model,
|
43 |
-
tools=[download_image, image_processing, object_detection_tool, ocr_scan],
|
44 |
max_steps=4,
|
45 |
name="vlm_agent",
|
46 |
description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
|
|
|
1 |
from smolagents import CodeAgent, InferenceClientModel
|
2 |
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
|
3 |
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
|
4 |
+
from vlm_tools import download_image, image_processing, object_detection_tool, ocr_scan, extract_frames_from_video
|
5 |
from audio_tools import audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
|
6 |
from community_tools import community_tools
|
7 |
import os
|
|
|
40 |
|
41 |
vlm_agent = CodeAgent(
|
42 |
model=vlm_model,
|
43 |
+
tools=[download_image, image_processing, object_detection_tool, ocr_scan, extract_frames_from_video],
|
44 |
max_steps=4,
|
45 |
name="vlm_agent",
|
46 |
description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
|
vlm_tools.py
CHANGED
@@ -72,6 +72,25 @@ def post_processing(onnx_output, classes, original_shape, conf_threshold=0.5, nm
|
|
72 |
|
73 |
return detected_objects
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
@tool
|
76 |
def download_image(image_url: str)->str:
|
77 |
"""
|
|
|
72 |
|
73 |
return detected_objects
|
74 |
|
75 |
+
@tool
|
76 |
+
def extract_frames_from_video(video_path: str) -> list:
|
77 |
+
"""
|
78 |
+
Extract frames from a video
|
79 |
+
Args:
|
80 |
+
video_path: The path to the video file
|
81 |
+
Returns:
|
82 |
+
A list of frames as numpy arrays
|
83 |
+
"""
|
84 |
+
cap = cv2.VideoCapture(video_path)
|
85 |
+
frames = []
|
86 |
+
while cap.isOpened():
|
87 |
+
ret, frame = cap.read()
|
88 |
+
if not ret:
|
89 |
+
break
|
90 |
+
frames.append(frame)
|
91 |
+
cap.release()
|
92 |
+
return frames
|
93 |
+
|
94 |
@tool
|
95 |
def download_image(image_url: str)->str:
|
96 |
"""
|