Spaces:
Running
on
Zero
Running
on
Zero
Update
Browse files
app.py
CHANGED
@@ -53,7 +53,7 @@ IMAGE_EXAMPLES = [
|
|
53 |
]
|
54 |
|
55 |
# Video
|
56 |
-
MAX_NUM_FRAMES =
|
57 |
BATCH_SIZE = 4
|
58 |
ALLOWED_VIDEO_EXTENSIONS = {".mp4", ".avi", ".mov"}
|
59 |
VIDEO_OUTPUT_DIR = Path("static/videos")
|
@@ -70,18 +70,28 @@ logging.basicConfig(
|
|
70 |
logger = logging.getLogger(__name__)
|
71 |
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
@spaces.GPU(duration=20)
|
74 |
def detect_objects(
|
75 |
checkpoint: str,
|
76 |
-
images: List[np.ndarray],
|
77 |
confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
|
78 |
target_size: Optional[Tuple[int, int]] = None,
|
79 |
batch_size: int = BATCH_SIZE,
|
80 |
):
|
81 |
|
82 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
83 |
-
model =
|
84 |
-
|
|
|
|
|
|
|
85 |
|
86 |
batches = [images[i:i + batch_size] for i in range(0, len(images), batch_size)]
|
87 |
|
@@ -205,12 +215,13 @@ def process_video(
|
|
205 |
|
206 |
n_frames_to_read = min(MAX_NUM_FRAMES, video_info.total_frames // read_each_i_frame)
|
207 |
frames = read_video_k_frames(video_path, n_frames_to_read, read_each_i_frame)
|
|
|
208 |
|
209 |
box_annotator = sv.BoxAnnotator(thickness=1)
|
210 |
label_annotator = sv.LabelAnnotator(text_scale=0.5)
|
211 |
|
212 |
results, id2label = detect_objects(
|
213 |
-
images=frames,
|
214 |
checkpoint=checkpoint,
|
215 |
confidence_threshold=confidence_threshold,
|
216 |
target_size=(target_height, target_width),
|
@@ -218,7 +229,6 @@ def process_video(
|
|
218 |
|
219 |
annotated_frames = []
|
220 |
for frame, result in tqdm.tqdm(zip(frames, results), desc="Annotating frames", total=len(frames)):
|
221 |
-
frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
|
222 |
detections = sv.Detections.from_transformers(result, id2label=id2label)
|
223 |
detections = detections.with_nms(threshold=0.95, class_agnostic=True)
|
224 |
annotated_frame = box_annotator.annotate(scene=frame, detections=detections)
|
@@ -226,7 +236,7 @@ def process_video(
|
|
226 |
annotated_frames.append(annotated_frame)
|
227 |
|
228 |
output_filename = os.path.join(VIDEO_OUTPUT_DIR, f"output_{uuid.uuid4()}.mp4")
|
229 |
-
iio.imwrite(output_filename, annotated_frames, fps=target_fps, codec="h264")
|
230 |
return output_filename
|
231 |
|
232 |
|
|
|
53 |
]
|
54 |
|
55 |
# Video
|
56 |
+
MAX_NUM_FRAMES = 250
|
57 |
BATCH_SIZE = 4
|
58 |
ALLOWED_VIDEO_EXTENSIONS = {".mp4", ".avi", ".mov"}
|
59 |
VIDEO_OUTPUT_DIR = Path("static/videos")
|
|
|
70 |
logger = logging.getLogger(__name__)
|
71 |
|
72 |
|
73 |
+
@lru_cache(maxsize=3)
|
74 |
+
def get_model_and_processor(checkpoint: str):
|
75 |
+
model = AutoModelForObjectDetection.from_pretrained(checkpoint, torch_dtype=TORCH_DTYPE)
|
76 |
+
image_processor = AutoImageProcessor.from_pretrained(checkpoint)
|
77 |
+
return model, image_processor
|
78 |
+
|
79 |
+
|
80 |
@spaces.GPU(duration=20)
|
81 |
def detect_objects(
|
82 |
checkpoint: str,
|
83 |
+
images: List[np.ndarray] | np.ndarray,
|
84 |
confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
|
85 |
target_size: Optional[Tuple[int, int]] = None,
|
86 |
batch_size: int = BATCH_SIZE,
|
87 |
):
|
88 |
|
89 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
90 |
+
model, image_processor = get_model_and_processor(checkpoint)
|
91 |
+
model = model.to(device)
|
92 |
+
|
93 |
+
if isinstance(images, np.ndarray) and images.ndim == 4:
|
94 |
+
images = [x for x in images] # split video array into list of images
|
95 |
|
96 |
batches = [images[i:i + batch_size] for i in range(0, len(images), batch_size)]
|
97 |
|
|
|
215 |
|
216 |
n_frames_to_read = min(MAX_NUM_FRAMES, video_info.total_frames // read_each_i_frame)
|
217 |
frames = read_video_k_frames(video_path, n_frames_to_read, read_each_i_frame)
|
218 |
+
frames = [cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_CUBIC) for frame in frames]
|
219 |
|
220 |
box_annotator = sv.BoxAnnotator(thickness=1)
|
221 |
label_annotator = sv.LabelAnnotator(text_scale=0.5)
|
222 |
|
223 |
results, id2label = detect_objects(
|
224 |
+
images=np.array(frames),
|
225 |
checkpoint=checkpoint,
|
226 |
confidence_threshold=confidence_threshold,
|
227 |
target_size=(target_height, target_width),
|
|
|
229 |
|
230 |
annotated_frames = []
|
231 |
for frame, result in tqdm.tqdm(zip(frames, results), desc="Annotating frames", total=len(frames)):
|
|
|
232 |
detections = sv.Detections.from_transformers(result, id2label=id2label)
|
233 |
detections = detections.with_nms(threshold=0.95, class_agnostic=True)
|
234 |
annotated_frame = box_annotator.annotate(scene=frame, detections=detections)
|
|
|
236 |
annotated_frames.append(annotated_frame)
|
237 |
|
238 |
output_filename = os.path.join(VIDEO_OUTPUT_DIR, f"output_{uuid.uuid4()}.mp4")
|
239 |
+
iio.imwrite(output_filename, annotated_frames, fps=target_fps, codec="h264")
|
240 |
return output_filename
|
241 |
|
242 |
|