Spaces:
Sleeping
Sleeping
Tool desc again
Browse files- vlm_tools.py +14 -4
vlm_tools.py
CHANGED
@@ -130,14 +130,19 @@ onnx_path = "vlm_assets/yolov3-8.onnx"
|
|
130 |
names_path = "vlm_assets/obj.names"
|
131 |
|
132 |
class ObjectDetectionTool(Tool):
|
133 |
-
description = "
|
|
|
|
|
|
|
|
|
|
|
134 |
name = "object_detection"
|
135 |
inputs = {
|
136 |
"frames": {"type": "any", "description": "The list of frames (images) to detect objects in. Must be a List[str] or a List[np.ndarray]"},
|
137 |
"onnx_path": {"type": "string", "description": "The path to the onnx file"},
|
138 |
"names_path": {"type": "string", "description": "The path to the names file"}
|
139 |
}
|
140 |
-
output_type = "
|
141 |
|
142 |
def setup(self):
|
143 |
# Load ONNX model
|
@@ -164,12 +169,17 @@ class ObjectDetectionTool(Tool):
|
|
164 |
return detected_objects
|
165 |
|
166 |
class OCRTool(Tool):
|
167 |
-
description = "
|
|
|
|
|
|
|
|
|
|
|
168 |
name = "ocr_scan"
|
169 |
inputs = {
|
170 |
"frames": {"type": "any", "description": "The list of frames (images) to scan for text. Must be a List[str] or a List[np.ndarray]"}
|
171 |
}
|
172 |
-
output_type = "
|
173 |
|
174 |
def forward(self, frames: any)->any:
|
175 |
scanned_text = []
|
|
|
130 |
names_path = "vlm_assets/obj.names"
|
131 |
|
132 |
class ObjectDetectionTool(Tool):
|
133 |
+
description = """
|
134 |
+
Detect objects in a list of frames (images).
|
135 |
+
It takes a list of frames (images) as input and returns
|
136 |
+
a list of detected objects with labels, confidence, and bounding boxes.
|
137 |
+
The output type will be List[List[str]]
|
138 |
+
"""
|
139 |
name = "object_detection"
|
140 |
inputs = {
|
141 |
"frames": {"type": "any", "description": "The list of frames (images) to detect objects in. Must be a List[str] or a List[np.ndarray]"},
|
142 |
"onnx_path": {"type": "string", "description": "The path to the onnx file"},
|
143 |
"names_path": {"type": "string", "description": "The path to the names file"}
|
144 |
}
|
145 |
+
output_type = "any"
|
146 |
|
147 |
def setup(self):
|
148 |
# Load ONNX model
|
|
|
169 |
return detected_objects
|
170 |
|
171 |
class OCRTool(Tool):
|
172 |
+
description = """
|
173 |
+
Scan an image for text.
|
174 |
+
It takes a list of frames (images) as input and returns
|
175 |
+
a list of text in the images.
|
176 |
+
The output type will be List[List[str]]
|
177 |
+
"""
|
178 |
name = "ocr_scan"
|
179 |
inputs = {
|
180 |
"frames": {"type": "any", "description": "The list of frames (images) to scan for text. Must be a List[str] or a List[np.ndarray]"}
|
181 |
}
|
182 |
+
output_type = "any"
|
183 |
|
184 |
def forward(self, frames: any)->any:
|
185 |
scanned_text = []
|