Spaces:
Build error
Build error
onipot
commited on
Commit
·
c12cb7e
1
Parent(s):
13f24b5
augmented inference
Browse files- app.py +33 -21
- yolov5/detect.py +38 -5
app.py
CHANGED
@@ -22,14 +22,14 @@ from yolov5.detect import predict, load_yolo_model
|
|
22 |
model, stride, names, pt, jit, onnx, engine = load_yolo_model("face_model.pt", imgsz=[320,320])
|
23 |
age_model_ts = torch.jit.load("age_model.pt")
|
24 |
|
25 |
-
|
26 |
|
27 |
-
|
28 |
|
29 |
-
#img_path = img
|
30 |
-
#img0 = Image.open(img_path).convert("RGB")
|
31 |
|
32 |
-
|
|
|
|
|
33 |
img0 = ImageOps.exif_transpose(img0)
|
34 |
|
35 |
draw = ImageDraw.Draw(img0)
|
@@ -37,7 +37,8 @@ def run_yolo(img0):
|
|
37 |
predictions = predict(age_model_ts, model,
|
38 |
stride, imgsz=[320, 320],
|
39 |
conf_thres=0.5, iou_thres=0.45,
|
40 |
-
source=img0
|
|
|
41 |
)
|
42 |
|
43 |
detections : list[Detection] = []
|
@@ -56,21 +57,31 @@ def run_yolo(img0):
|
|
56 |
|
57 |
detections.append(det)
|
58 |
draw.rectangle(((det.xmin, det.ymin), (det.xmax, det.ymax)), fill=None, outline=(255,255,255))
|
59 |
-
draw.rectangle(((det.xmin, det.ymin - 20), (det.xmax, det.ymin)), fill=(255,255,255))
|
60 |
-
draw.text((det.xmin, det.ymin - 20), det.class_name, fill=(0,0,0), font=roboto_font)
|
61 |
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
63 |
return img0
|
64 |
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
-
title = "AgeGuesser"
|
72 |
-
description = "Guess the age of a person from a facial image!"
|
73 |
-
article = """
|
|
|
74 |
<p><b>Links</b></p>
|
75 |
<ul>
|
76 |
<li>
|
@@ -83,9 +94,7 @@ article = """<p>A fully automated system based on YOLOv5 and EfficientNet to per
|
|
83 |
<a href='https://github.com/ai-hazard/AgeGuesser-train'>Github</a>
|
84 |
</li>
|
85 |
</ul>
|
86 |
-
|
87 |
<p>Credits to my dear colleague <a href='https://www.linkedin.com/in/nicola-marvulli-904270136/'>Dott. Nicola Marvulli</a>, we've developed AgeGuesser together as part of two university exams. (Computer Vision + Deep Learning)</p>
|
88 |
-
|
89 |
<p>Credits to my dear professors and the <a href='https://sites.google.com/site/cilabuniba/'>CILAB</a> research group</p>
|
90 |
<ul>
|
91 |
<li>
|
@@ -95,8 +104,11 @@ article = """<p>A fully automated system based on YOLOv5 and EfficientNet to per
|
|
95 |
<a href='https://sites.google.com/view/gennaro-vessio/home-page'>Prof. Gennaro Vessio</a>
|
96 |
</li>
|
97 |
</ul>
|
98 |
-
"""
|
|
|
|
|
|
|
|
|
99 |
|
100 |
-
examples = [['images/1.jpg'], ['images/2.jpg'], ['images/3.jpg'], ['images/4.jpg'], ['images/5.jpg'], ]
|
101 |
|
102 |
-
|
|
|
22 |
model, stride, names, pt, jit, onnx, engine = load_yolo_model("face_model.pt", imgsz=[320,320])
|
23 |
age_model_ts = torch.jit.load("age_model.pt")
|
24 |
|
25 |
+
text_box_height = 22
|
26 |
|
27 |
+
roboto_font = load_font(height_px=text_box_height-2)
|
28 |
|
|
|
|
|
29 |
|
30 |
+
def run_yolo(img0, with_random_augs):
|
31 |
+
|
32 |
+
img0 = ImageOps.contain(img0, (640,640))
|
33 |
img0 = ImageOps.exif_transpose(img0)
|
34 |
|
35 |
draw = ImageDraw.Draw(img0)
|
|
|
37 |
predictions = predict(age_model_ts, model,
|
38 |
stride, imgsz=[320, 320],
|
39 |
conf_thres=0.5, iou_thres=0.45,
|
40 |
+
source=img0,
|
41 |
+
with_random_augs = with_random_augs
|
42 |
)
|
43 |
|
44 |
detections : list[Detection] = []
|
|
|
57 |
|
58 |
detections.append(det)
|
59 |
draw.rectangle(((det.xmin, det.ymin), (det.xmax, det.ymax)), fill=None, outline=(255,255,255))
|
|
|
|
|
60 |
|
61 |
+
text_length = roboto_font.getlength(bbox["class"])
|
62 |
+
|
63 |
+
rect_center = (det.xmin + det.xmax - text_length) // 2
|
64 |
+
draw.rectangle(((rect_center, det.ymin), (rect_center + text_length, det.ymin + text_box_height)), fill=(255,255,255))
|
65 |
+
draw.text((rect_center, det.ymin), det.class_name, fill=(0,0,0), font=roboto_font)
|
66 |
+
|
67 |
return img0
|
68 |
|
69 |
|
70 |
+
""" img = Image.open("D:\\Download\\IMG_20220803_153335c2.jpg").convert("RGB")
|
71 |
+
run_yolo(img)
|
72 |
+
sys.exit(1) """
|
73 |
+
|
74 |
+
|
75 |
+
def main():
|
76 |
+
input = gr.Image(type='pil', label="Input Image")
|
77 |
+
outputs = gr.Image(type="pil", label="Output Image", interactive=False)
|
78 |
+
|
79 |
+
augment_preds = gr.Checkbox(label="Apply random augmentations")
|
80 |
|
81 |
+
title = "AgeGuesser"
|
82 |
+
description = "Guess the age of a person from a facial image!"
|
83 |
+
article = """
|
84 |
+
<p>A fully automated system based on YOLOv5 and EfficientNet to perform face detection and age estimation in real-time.</p>
|
85 |
<p><b>Links</b></p>
|
86 |
<ul>
|
87 |
<li>
|
|
|
94 |
<a href='https://github.com/ai-hazard/AgeGuesser-train'>Github</a>
|
95 |
</li>
|
96 |
</ul>
|
|
|
97 |
<p>Credits to my dear colleague <a href='https://www.linkedin.com/in/nicola-marvulli-904270136/'>Dott. Nicola Marvulli</a>, we've developed AgeGuesser together as part of two university exams. (Computer Vision + Deep Learning)</p>
|
|
|
98 |
<p>Credits to my dear professors and the <a href='https://sites.google.com/site/cilabuniba/'>CILAB</a> research group</p>
|
99 |
<ul>
|
100 |
<li>
|
|
|
104 |
<a href='https://sites.google.com/view/gennaro-vessio/home-page'>Prof. Gennaro Vessio</a>
|
105 |
</li>
|
106 |
</ul>
|
107 |
+
"""
|
108 |
+
|
109 |
+
examples = [['images/1.jpg'], ['images/2.jpg'], ['images/3.jpg'], ['images/4.jpg'], ['images/5.jpg'], ]
|
110 |
+
|
111 |
+
gr.Interface(run_yolo, [input, augment_preds], outputs, title=title, description=description, article=article, examples=examples, theme="huggingface").launch(enable_queue=True, ) # share=True
|
112 |
|
|
|
113 |
|
114 |
+
main()
|
yolov5/detect.py
CHANGED
@@ -21,6 +21,7 @@ from yolov5.utils.general import (check_img_size,
|
|
21 |
from yolov5.utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, pil_to_cv
|
22 |
from yolov5.models.common import DetectMultiBackend
|
23 |
import torchvision
|
|
|
24 |
|
25 |
test_transforms = torchvision.transforms.Compose([
|
26 |
torchvision.transforms.ToPILImage(),
|
@@ -29,6 +30,18 @@ test_transforms = torchvision.transforms.Compose([
|
|
29 |
torchvision.transforms.Resize((224, 224)),
|
30 |
])
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
def load_yolo_model(weights, device="cpu", imgsz=[1280, 1280]):
|
33 |
# Load model
|
34 |
device = select_device(device)
|
@@ -62,7 +75,7 @@ def predict(
|
|
62 |
augment=False, # augmented inference
|
63 |
visualize=False, # visualize features
|
64 |
half=False, # use FP16 half-precision inference
|
65 |
-
|
66 |
):
|
67 |
|
68 |
im, im0 = pil_to_cv(source, img_size=imgsz[0], stride=stride)
|
@@ -82,6 +95,8 @@ def predict(
|
|
82 |
|
83 |
# Process predictions
|
84 |
preds = []
|
|
|
|
|
85 |
for i, det in enumerate(pred): # per image
|
86 |
|
87 |
# im0 = im0.copy()
|
@@ -89,16 +104,34 @@ def predict(
|
|
89 |
if len(det):
|
90 |
# Rescale boxes from img_size to im0 size
|
91 |
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
|
|
|
|
|
92 |
|
93 |
-
|
94 |
face = im0[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
|
95 |
face_img = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
|
|
|
|
|
96 |
im = test_transforms(face_img).unsqueeze_(0)
|
|
|
97 |
with torch.no_grad():
|
98 |
y = age_model(im)
|
99 |
|
100 |
-
age = y[0]
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
return preds
|
|
|
21 |
from yolov5.utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, pil_to_cv
|
22 |
from yolov5.models.common import DetectMultiBackend
|
23 |
import torchvision
|
24 |
+
import numpy as np
|
25 |
|
26 |
test_transforms = torchvision.transforms.Compose([
|
27 |
torchvision.transforms.ToPILImage(),
|
|
|
30 |
torchvision.transforms.Resize((224, 224)),
|
31 |
])
|
32 |
|
33 |
+
|
34 |
+
test_random_transforms = torchvision.transforms.Compose([
|
35 |
+
torchvision.transforms.ToPILImage(),
|
36 |
+
torchvision.transforms.transforms.ToTensor(),
|
37 |
+
torchvision.transforms.RandomRotation((-15, 15)),
|
38 |
+
torchvision.transforms.RandomGrayscale(p=0.4),
|
39 |
+
torchvision.transforms.RandomPerspective(0.4, p=0.4),
|
40 |
+
torchvision.transforms.RandomAdjustSharpness(2),
|
41 |
+
torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
42 |
+
torchvision.transforms.Resize((224, 224)),
|
43 |
+
])
|
44 |
+
|
45 |
def load_yolo_model(weights, device="cpu", imgsz=[1280, 1280]):
|
46 |
# Load model
|
47 |
device = select_device(device)
|
|
|
75 |
augment=False, # augmented inference
|
76 |
visualize=False, # visualize features
|
77 |
half=False, # use FP16 half-precision inference
|
78 |
+
with_random_augs = False
|
79 |
):
|
80 |
|
81 |
im, im0 = pil_to_cv(source, img_size=imgsz[0], stride=stride)
|
|
|
95 |
|
96 |
# Process predictions
|
97 |
preds = []
|
98 |
+
|
99 |
+
|
100 |
for i, det in enumerate(pred): # per image
|
101 |
|
102 |
# im0 = im0.copy()
|
|
|
104 |
if len(det):
|
105 |
# Rescale boxes from img_size to im0 size
|
106 |
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
|
107 |
+
|
108 |
+
for *xyxy, conf, _ in reversed(det):
|
109 |
|
110 |
+
ages = []
|
111 |
face = im0[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
|
112 |
face_img = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
|
113 |
+
|
114 |
+
# inference with original crop
|
115 |
im = test_transforms(face_img).unsqueeze_(0)
|
116 |
+
|
117 |
with torch.no_grad():
|
118 |
y = age_model(im)
|
119 |
|
120 |
+
age = y[0].item()
|
121 |
+
ages.append(age)
|
122 |
+
|
123 |
+
if with_random_augs:
|
124 |
+
# inference with random augmentations
|
125 |
+
for k in range(12):
|
126 |
+
im = test_random_transforms(face_img).unsqueeze_(0)
|
127 |
+
|
128 |
+
with torch.no_grad():
|
129 |
+
y = age_model(im)
|
130 |
+
|
131 |
+
age = y[0].item()
|
132 |
+
|
133 |
+
ages.append(age)
|
134 |
+
|
135 |
+
preds.append({"class": str(int( np.mean(np.array(ages), axis=0))), "xmin": int(xyxy[0]), "ymin": int(xyxy[1]), "xmax": int(xyxy[2]),"ymax": int(xyxy[3]), "conf": float(conf)})
|
136 |
|
137 |
return preds
|