hamhanry commited on
Commit
3d9d465
·
1 Parent(s): 133e1f4

add: gradio structure for user inference

Browse files
app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gradio as gr
3
+ from ultralytics import YOLO
4
+ import tempfile
5
+ import cv2
6
+
7
+ def inference(image, video, model_id, image_size, conf_threshold):
8
+ if model_id == "yolov10n-obb":
9
+ model = YOLO("")
10
+ elif model_id == "yolov10s-obb":
11
+ model = YOLO("")
12
+ elif model_id == "yolov10m-obb":
13
+ model = YOLO("")
14
+
15
+ if image:
16
+ results = model.predict(source=image, imgsz=image_size, conf=conf_threshold, device="cpu")
17
+ annotated_image = results[0].plot()
18
+ return annotated_image[:, :, ::-1], None
19
+ else:
20
+ video_path = tempfile.mktemp(suffix=".webm")
21
+ with open(video_path, "wb") as f:
22
+ with open(video, "rb") as g:
23
+ f.write(g.read())
24
+
25
+ cap = cv2.VideoCapture(video_path)
26
+ fps = cap.get(cv2.CAP_PROP_FPS)
27
+ frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
28
+ frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
29
+
30
+ output_video_path = tempfile.mktemp(suffix=".webm")
31
+ out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'vp90'), fps, (frame_width, frame_height))
32
+
33
+ while cap.isOpened():
34
+ ret, frame = cap.read()
35
+ if not ret:
36
+ break
37
+
38
+ results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold)
39
+ annotated_frame = results[0].plot()
40
+ out.write(annotated_frame)
41
+
42
+ cap.release()
43
+ out.release()
44
+
45
+ return None, output_video_path
46
+
47
+ def inference_for_examples(image, model_path, image_size, conf_threshold):
48
+ annotated_image, _ = inference(image, None, model_path, image_size, conf_threshold)
49
+ return annotated_image
50
+
51
+ def app():
52
+ with gr.Blocks():
53
+ with gr.Row():
54
+ with gr.Column():
55
+ image = gr.Image(type="pil", label="Image", visible=True)
56
+ video = gr.Video(label="Video", visible=False)
57
+ input_type = gr.Radio(
58
+ choices=["Image", "Video"],
59
+ value="Image",
60
+ label="Input Type",
61
+ )
62
+ model_id = gr.Dropdown(
63
+ label="Model",
64
+ choices=[
65
+ "yolov10n-obb-obb",
66
+ "yolov10s-obb-obb",
67
+ "yolov10m-obb-bb",
68
+ ],
69
+ value="yolov10n-obb-obb",
70
+ )
71
+ image_size = gr.Slider(
72
+ label="Image Size",
73
+ minimum=320,
74
+ maximum=1280,
75
+ step=32,
76
+ value=640,
77
+ )
78
+ conf_threshold = gr.Slider(
79
+ label="Confidence Threshold",
80
+ minimum=0.0,
81
+ maximum=1.0,
82
+ step=0.05,
83
+ value=0.25,
84
+ )
85
+ inferBtn = gr.Button(value="Detect Phone and Camera")
86
+
87
+ with gr.Column():
88
+ output_image = gr.Image(type="numpy", label="Annotated Image", visible=True)
89
+ output_video = gr.Video(label="Annotated Video", visible=False)
90
+
91
+ def update_visibility(input_type):
92
+ image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False)
93
+ video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True)
94
+ output_image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False)
95
+ output_video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True)
96
+
97
+ return image, video, output_image, output_video
98
+
99
+ input_type.change(
100
+ fn=update_visibility,
101
+ inputs=[input_type],
102
+ outputs=[image, video, output_image, output_video],
103
+ )
104
+
105
+ def run_inference(image, video, model_id, image_size, conf_threshold, input_type):
106
+ if input_type == "Image":
107
+ return inference(image, None, model_id, image_size, conf_threshold)
108
+ else:
109
+ return inference(None, video, model_id, image_size, conf_threshold)
110
+
111
+
112
+ inferBtn.click(
113
+ fn=run_inference,
114
+ inputs=[image, video, model_id, image_size, conf_threshold, input_type],
115
+ outputs=[output_image, output_video],
116
+ )
117
+
118
+ gr.Examples(
119
+ examples=[
120
+ [
121
+ "test_images/P0024.jpg",
122
+ "yolov10n-obb",
123
+ 640,
124
+ 0.25,
125
+ ],
126
+ [
127
+ "test_images/P0035.jpg",
128
+ "yolov10n-obb",
129
+ 640,
130
+ 0.25,
131
+ ],
132
+ [
133
+ "test_images/P00121.jpg",
134
+ "yolov10n-obb",
135
+ 640,
136
+ 0.25,
137
+ ],
138
+ [
139
+ "test_images/P0180.jpg",
140
+ "yolov10n-obb",
141
+ 640,
142
+ 0.25,
143
+ ],
144
+ [
145
+ "test_images/P0279.jpg",
146
+ "yolov10n-obb",
147
+ 640,
148
+ 0.25,
149
+ ],
150
+ [
151
+ "test_images/P2112.jpg",
152
+ "yolov10n-obb",
153
+ 640,
154
+ 0.25,
155
+ ],
156
+ ],
157
+ fn=inference_for_examples,
158
+ inputs=[
159
+ image,
160
+ model_id,
161
+ image_size,
162
+ conf_threshold,
163
+ ],
164
+ outputs=[output_image],
165
+ cache_examples='lazy',
166
+ )
167
+
168
+ gradio_app = gr.Blocks()
169
+ with gradio_app:
170
+ gr.Markdown(
171
+ """
172
+ # YOLOv10 - OBB (Oriented Bounding Box)
173
+ """
174
+ )
175
+ with gr.Row():
176
+ with gr.Column():
177
+ app()
178
+ if __name__ == '__main__':
179
+ gradio_app.queue()
180
+ gradio_app.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ultralytics==8.2.49
2
+ opencv-python==4.10.0.84
3
+ numpy==1.26.4
test_images/P0024.jpg ADDED
test_images/P0035.jpg ADDED
test_images/P0121.jpg ADDED
test_images/P0180.jpg ADDED
test_images/P0279.jpg ADDED
test_images/P2112.jpg ADDED