yonigozlan HF staff commited on
Commit
ca069cd
·
0 Parent(s):

initial commit

Browse files
Files changed (8) hide show
  1. .gitattributes +38 -0
  2. README copy.md +15 -0
  3. README.md +13 -0
  4. app.py +178 -0
  5. cat.mp4 +3 -0
  6. football.mp4 +3 -0
  7. requirements.txt +7 -0
  8. safari2.mp4 +3 -0
.gitattributes ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ football.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ safari2.mp4 filter=lfs diff=lfs merge=lfs -text
38
+ cat.mp4 filter=lfs diff=lfs merge=lfs -text
README copy.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Omdet Turbo Open Vocabulary
3
+ emoji: 📹
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 4.42.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ short_description: Video captioning/open-vocabulary/zero-shot
12
+
13
+ ---
14
+
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: RTDETR Compiled Speed Test
3
+ emoji: 🐠
4
+ colorFrom: purple
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 4.44.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+
4
+ import gradio as gr
5
+ import numpy as np
6
+ import requests
7
+ import spaces
8
+ import supervision as sv
9
+ import torch
10
+ from PIL import Image
11
+ from tqdm import tqdm
12
+
13
+ from transformers import AutoModelForObjectDetection, AutoProcessor
14
+
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+
17
+ processor = AutoProcessor.from_pretrained("PekingU/rtdetr_r50vd_coco_o365")
18
+ model = AutoModelForObjectDetection.from_pretrained(
19
+ "PekingU/rtdetr_r50vd_coco_o365",
20
+ disable_custom_kernels=True,
21
+ torch_dtype=torch.float16,
22
+ ).to(device)
23
+ model_compiled = torch.compile(model, mode="reduce-overhead")
24
+
25
+ url = "http://images.cocodataset.org/val2017/000000039769.jpg"
26
+ image = Image.open(requests.get(url, stream=True).raw)
27
+ inputs = processor(images=image, return_tensors="pt").to("cuda").to(torch.float16)
28
+
29
+ print("Compiling model...")
30
+ start_time = time.time()
31
+ with torch.no_grad():
32
+ for _ in range(10):
33
+ outputs = model_compiled(**inputs)
34
+ _ = outputs[0].cpu()
35
+ print(f"Model compiled in {time.time() - start_time:.2f} seconds.")
36
+
37
+ css = """
38
+ .feedback textarea {font-size: 24px !important}
39
+ """
40
+
41
+ BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator()
42
+ MASK_ANNOTATOR = sv.MaskAnnotator()
43
+ LABEL_ANNOTATOR = sv.LabelAnnotator()
44
+ TRACKER = sv.ByteTrack()
45
+
46
+
47
+ def calculate_end_frame_index(source_video_path):
48
+ video_info = sv.VideoInfo.from_video_path(source_video_path)
49
+ return min(video_info.total_frames, video_info.fps * 5)
50
+
51
+
52
+ def annotate_image(input_image, detections, labels) -> np.ndarray:
53
+ output_image = MASK_ANNOTATOR.annotate(input_image, detections)
54
+ output_image = BOUNDING_BOX_ANNOTATOR.annotate(output_image, detections)
55
+ output_image = LABEL_ANNOTATOR.annotate(output_image, detections, labels=labels)
56
+ return output_image
57
+
58
+
59
+ @spaces.GPU
60
+ def process_video(
61
+ input_video,
62
+ confidence_threshold,
63
+ max_side,
64
+ progress=gr.Progress(track_tqdm=True),
65
+ ):
66
+ video_info = sv.VideoInfo.from_video_path(input_video)
67
+ total = calculate_end_frame_index(input_video)
68
+ frame_generator = sv.get_video_frames_generator(source_path=input_video, end=total)
69
+
70
+ result_file_name = "output.mp4"
71
+ result_file_path = os.path.join(os.getcwd(), result_file_name)
72
+ all_fps = []
73
+ with sv.VideoSink(result_file_path, video_info=video_info) as sink:
74
+ for _ in tqdm(range(total), desc="Processing video.."):
75
+ try:
76
+ frame = next(frame_generator)
77
+ except StopIteration:
78
+ break
79
+ results, fps = query(frame, confidence_threshold, max_side=max_side)
80
+ all_fps.append(fps)
81
+ final_labels = []
82
+ detections = []
83
+
84
+ detections = sv.Detections.from_transformers(results[0])
85
+ detections = TRACKER.update_with_detections(detections)
86
+ for label in detections.class_id.tolist():
87
+ final_labels.append(model.config.id2label[label])
88
+ frame = annotate_image(
89
+ input_image=frame,
90
+ detections=detections,
91
+ labels=final_labels,
92
+ )
93
+ sink.write_frame(frame)
94
+
95
+ avg_fps = np.mean(all_fps)
96
+ return result_file_path, gr.Markdown(
97
+ f'<h3 style="text-align: center;">Model inference FPS: {avg_fps:.2f}</h3>',
98
+ visible=True,
99
+ )
100
+
101
+
102
+ def query(frame, confidence_threshold, max_side=640):
103
+ frame_resized = sv.resize_image(
104
+ image=frame, resolution_wh=(max_side, max_side), keep_aspect_ratio=True
105
+ )
106
+ image = Image.fromarray(frame_resized)
107
+ inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
108
+ with torch.no_grad():
109
+ start = time.time()
110
+ outputs = model_compiled(**inputs)
111
+ outputs[0].cpu()
112
+ fps = 1 / (time.time() - start)
113
+ target_sizes = torch.tensor([frame.shape[:2]]).to(device)
114
+
115
+ results = processor.post_process_object_detection(
116
+ outputs=outputs,
117
+ threshold=confidence_threshold,
118
+ target_sizes=target_sizes,
119
+ )
120
+ return results, fps
121
+
122
+
123
+ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
124
+ gr.Markdown("## Real Time Object Detection with compiled RT-DETR")
125
+ gr.Markdown(
126
+ """
127
+ This is a demo for real-time object detection using RT-DETR compiled.<br>
128
+ It runs on ZeroGPU which captures GPU every first time you infer.<br>
129
+ This combined with video processing time means that the demo inference time is slower than the model's actual inference time.<br>
130
+ The actual model average inference FPS is displayed under the processed video after inference.
131
+ """
132
+ )
133
+ gr.Markdown(
134
+ "Simply upload a video! You can also play with confidence threshold or try the examples below. 👇"
135
+ )
136
+
137
+ with gr.Row():
138
+ with gr.Column():
139
+ input_video = gr.Video(label="Input Video")
140
+ with gr.Column():
141
+ output_video = gr.Video(label="Output Video (5s max)")
142
+ actual_fps = gr.Markdown("", visible=False)
143
+ with gr.Row():
144
+ conf = gr.Slider(
145
+ label="Confidence Threshold",
146
+ minimum=0.1,
147
+ maximum=1.0,
148
+ value=0.3,
149
+ step=0.05,
150
+ )
151
+ max_side = gr.Slider(
152
+ label="Image Size",
153
+ minimum=240,
154
+ maximum=1080,
155
+ value=640,
156
+ step=10,
157
+ )
158
+ with gr.Row():
159
+ submit = gr.Button(variant="primary")
160
+
161
+ example = gr.Examples(
162
+ examples=[
163
+ ["./football.mp4", 0.3, 640],
164
+ ["./cat.mp4", 0.3, 640],
165
+ ["./safari2.mp4", 0.3, 640],
166
+ ],
167
+ inputs=[input_video, conf, max_side],
168
+ outputs=output_video,
169
+ )
170
+
171
+ submit.click(
172
+ fn=process_video,
173
+ inputs=[input_video, conf, max_side],
174
+ outputs=[output_video, actual_fps],
175
+ )
176
+
177
+ if __name__ == "__main__":
178
+ demo.launch(show_error=True)
cat.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07539c031a516acecf58b8751f74ba90182efe4c4ad25513038f10564739eadd
3
+ size 810095
football.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a85c5c7d5d6e0825f76a71e5e3ee2ce35c8ffbe841ef4bfa544af1089259aa
3
+ size 2855852
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ timm
3
+ requests
4
+ numpy==1.26.3
5
+ git+https://github.com/yonigozlan/transformers.git@optim-rt-detr
6
+ supervision
7
+ spaces
safari2.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c7f26f775768d06219b19acb4c071e40928f1042b7b4fa2d876095c72139e19
3
+ size 3011687