Spaces:

mascIT
/

AgeGuesser

Build error

App Files Files Community

onipot commited on Aug 3, 2022

Commit

c12cb7e

1 Parent(s): 13f24b5

augmented inference

Browse files

Files changed (2) hide show

app.py +33 -21
yolov5/detect.py +38 -5

app.py CHANGED Viewed

@@ -22,14 +22,14 @@ from yolov5.detect import predict, load_yolo_model
 model, stride, names, pt, jit, onnx, engine = load_yolo_model("face_model.pt", imgsz=[320,320])
 age_model_ts = torch.jit.load("age_model.pt")
-roboto_font = load_font(height_px=18)
-def run_yolo(img0):
-    #img_path = img
-    #img0 = Image.open(img_path).convert("RGB")
-    img0 = ImageOps.contain(img0, (720,720))
     img0 = ImageOps.exif_transpose(img0)
     draw = ImageDraw.Draw(img0)
@@ -37,7 +37,8 @@ def run_yolo(img0):
     predictions = predict(age_model_ts, model,
         stride, imgsz=[320, 320],
         conf_thres=0.5, iou_thres=0.45,
-        source=img0
     )
     detections : list[Detection] = []
@@ -56,21 +57,31 @@ def run_yolo(img0):
         detections.append(det)
         draw.rectangle(((det.xmin, det.ymin), (det.xmax, det.ymax)), fill=None, outline=(255,255,255))
-        draw.rectangle(((det.xmin, det.ymin - 20), (det.xmax, det.ymin)), fill=(255,255,255))
-        draw.text((det.xmin, det.ymin - 20), det.class_name, fill=(0,0,0), font=roboto_font)
-    # img0.save("img.jpg")
     return img0
-#run_yolo("D:\\Download\\IMG_20220803_153335c.jpg")
-#sys.exit(1)
-inputs = gr.inputs.Image(type='pil', label="Input Image")
-outputs = gr.outputs.Image(type="pil", label="Output Image")
-title = "AgeGuesser"
-description = "Guess the age of a person from a facial image!"
-article = """<p>A fully automated system based on YOLOv5 and EfficientNet to perform face detection and age estimation in real-time.</p>
 <p><b>Links</b></p>
 <ul>
 <li>
@@ -83,9 +94,7 @@ article = """<p>A fully automated system based on YOLOv5 and EfficientNet to per
 <a href='https://github.com/ai-hazard/AgeGuesser-train'>Github</a>
 </li>
 </ul>
 <p>Credits to my dear colleague <a href='https://www.linkedin.com/in/nicola-marvulli-904270136/'>Dott. Nicola Marvulli</a>, we've developed AgeGuesser together as part of two university exams. (Computer Vision + Deep Learning)</p>
 <p>Credits to my dear professors and the <a href='https://sites.google.com/site/cilabuniba/'>CILAB</a> research group</p>
 <ul>
 <li>
@@ -95,8 +104,11 @@ article = """<p>A fully automated system based on YOLOv5 and EfficientNet to per
 <a href='https://sites.google.com/view/gennaro-vessio/home-page'>Prof. Gennaro Vessio</a>
 </li>
 </ul>
-"""
-examples = [['images/1.jpg'], ['images/2.jpg'], ['images/3.jpg'], ['images/4.jpg'], ['images/5.jpg'], ]
-gr.Interface(run_yolo, inputs, outputs, title=title, description=description, article=article, examples=examples, theme="huggingface").launch(enable_queue=True)

 model, stride, names, pt, jit, onnx, engine = load_yolo_model("face_model.pt", imgsz=[320,320])
 age_model_ts = torch.jit.load("age_model.pt")
+text_box_height = 22
+roboto_font = load_font(height_px=text_box_height-2)
+def run_yolo(img0, with_random_augs):
+    img0 = ImageOps.contain(img0, (640,640))
     img0 = ImageOps.exif_transpose(img0)
     draw = ImageDraw.Draw(img0)
     predictions = predict(age_model_ts, model,
         stride, imgsz=[320, 320],
         conf_thres=0.5, iou_thres=0.45,
+        source=img0,
+        with_random_augs = with_random_augs
     )
     detections : list[Detection] = []
         detections.append(det)
         draw.rectangle(((det.xmin, det.ymin), (det.xmax, det.ymax)), fill=None, outline=(255,255,255))
+        text_length = roboto_font.getlength(bbox["class"])
+        rect_center = (det.xmin + det.xmax - text_length) // 2
+        draw.rectangle(((rect_center, det.ymin), (rect_center + text_length, det.ymin + text_box_height)), fill=(255,255,255))
+        draw.text((rect_center, det.ymin), det.class_name, fill=(0,0,0), font=roboto_font)
     return img0
+""" img = Image.open("D:\\Download\\IMG_20220803_153335c2.jpg").convert("RGB")
+run_yolo(img)
+sys.exit(1) """
+def main():
+    input = gr.Image(type='pil', label="Input Image")
+    outputs = gr.Image(type="pil", label="Output Image", interactive=False)
+    augment_preds = gr.Checkbox(label="Apply random augmentations")
+    title = "AgeGuesser"
+    description = "Guess the age of a person from a facial image!"
+    article = """
+<p>A fully automated system based on YOLOv5 and EfficientNet to perform face detection and age estimation in real-time.</p>
 <p><b>Links</b></p>
 <ul>
 <li>
 <a href='https://github.com/ai-hazard/AgeGuesser-train'>Github</a>
 </li>
 </ul>
 <p>Credits to my dear colleague <a href='https://www.linkedin.com/in/nicola-marvulli-904270136/'>Dott. Nicola Marvulli</a>, we've developed AgeGuesser together as part of two university exams. (Computer Vision + Deep Learning)</p>
 <p>Credits to my dear professors and the <a href='https://sites.google.com/site/cilabuniba/'>CILAB</a> research group</p>
 <ul>
 <li>
 <a href='https://sites.google.com/view/gennaro-vessio/home-page'>Prof. Gennaro Vessio</a>
 </li>
 </ul>
+    """
+    examples = [['images/1.jpg'], ['images/2.jpg'], ['images/3.jpg'], ['images/4.jpg'], ['images/5.jpg'], ]
+    gr.Interface(run_yolo, [input, augment_preds], outputs, title=title, description=description, article=article, examples=examples, theme="huggingface").launch(enable_queue=True, ) # share=True
+main()

yolov5/detect.py CHANGED Viewed

@@ -21,6 +21,7 @@ from yolov5.utils.general import (check_img_size,
 from yolov5.utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, pil_to_cv
 from yolov5.models.common import DetectMultiBackend
 import torchvision
 test_transforms = torchvision.transforms.Compose([
     torchvision.transforms.ToPILImage(),
@@ -29,6 +30,18 @@ test_transforms = torchvision.transforms.Compose([
     torchvision.transforms.Resize((224, 224)),
 ])
 def load_yolo_model(weights, device="cpu", imgsz=[1280, 1280]):
     # Load model
     device = select_device(device)
@@ -62,7 +75,7 @@ def predict(
         augment=False,  # augmented inference
         visualize=False,  # visualize features
         half=False,  # use FP16 half-precision inference
         ):
     im, im0 = pil_to_cv(source, img_size=imgsz[0], stride=stride)
@@ -82,6 +95,8 @@ def predict(
     # Process predictions
     preds = []
     for i, det in enumerate(pred):  # per image
         # im0 = im0.copy()
@@ -89,16 +104,34 @@ def predict(
         if len(det):
             # Rescale boxes from img_size to im0 size
             det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
-            for *xyxy, conf, cls in reversed(det):
                 face = im0[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
                 face_img = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
                 im = test_transforms(face_img).unsqueeze_(0)
                 with torch.no_grad():
                     y = age_model(im)
-                age = y[0]
-                preds.append({"class": str(int(age)), "xmin": int(xyxy[0]), "ymin": int(xyxy[1]), "xmax": int(xyxy[2]),"ymax": int(xyxy[3]), "conf": float(conf)})
     return preds

 from yolov5.utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, pil_to_cv
 from yolov5.models.common import DetectMultiBackend
 import torchvision
+import numpy as np
 test_transforms = torchvision.transforms.Compose([
     torchvision.transforms.ToPILImage(),
     torchvision.transforms.Resize((224, 224)),
 ])
+test_random_transforms = torchvision.transforms.Compose([
+    torchvision.transforms.ToPILImage(),
+    torchvision.transforms.transforms.ToTensor(),
+    torchvision.transforms.RandomRotation((-15, 15)),
+    torchvision.transforms.RandomGrayscale(p=0.4),
+    torchvision.transforms.RandomPerspective(0.4, p=0.4),
+    torchvision.transforms.RandomAdjustSharpness(2),
+    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+    torchvision.transforms.Resize((224, 224)),
+])
 def load_yolo_model(weights, device="cpu", imgsz=[1280, 1280]):
     # Load model
     device = select_device(device)
         augment=False,  # augmented inference
         visualize=False,  # visualize features
         half=False,  # use FP16 half-precision inference
+        with_random_augs = False
         ):
     im, im0 = pil_to_cv(source, img_size=imgsz[0], stride=stride)
     # Process predictions
     preds = []
     for i, det in enumerate(pred):  # per image
         # im0 = im0.copy()
         if len(det):
             # Rescale boxes from img_size to im0 size
             det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
+            for *xyxy, conf, _ in reversed(det):
+                ages = []
                 face = im0[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
                 face_img = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
+                # inference with original crop
                 im = test_transforms(face_img).unsqueeze_(0)
                 with torch.no_grad():
                     y = age_model(im)
+                age = y[0].item()
+                ages.append(age)
+                if with_random_augs:
+                    # inference with random augmentations
+                    for k in range(12):
+                        im = test_random_transforms(face_img).unsqueeze_(0)
+                        with torch.no_grad():
+                            y = age_model(im)
+                        age = y[0].item()
+                        ages.append(age)
+                preds.append({"class": str(int( np.mean(np.array(ages), axis=0))), "xmin": int(xyxy[0]), "ymin": int(xyxy[1]), "xmax": int(xyxy[2]),"ymax": int(xyxy[3]), "conf": float(conf)})
     return preds