Spaces:

xiang-wuu
/

yolov5

Runtime error

App Files Files Community

glenn-jocher commited on Jun 24, 2021

Commit

f79d747

unverified ·

1 Parent(s): 417a2f4

Add optional dataset.yaml `path` attribute (#3753)

Browse files

* Add optional dataset.yaml `path` attribute

@KalenMike

* pass locals to python scripts

* handle lists

* update coco128.yaml

* Capitalize first letter

* add test key

* finalize GlobalWheat2020.yaml

* finalize objects365.yaml

* finalize SKU-110K.yaml

* finalize SKU-110K.yaml

* finalize VisDrone.yaml

* NoneType fix

* update download comment

* voc to VOC

* update

* update VOC.yaml

* update VOC.yaml

* remove dashes

* delete get_voc.sh

* force coco and coco128 to ../datasets

* Capitalize Argoverse_HD.yaml

* Capitalize Objects365.yaml

* update Argoverse_HD.yaml

* coco segments fix

* VOC single-thread

* update Argoverse_HD.yaml

* update data_dict in test handling

* create root

Files changed (17) hide show

data/Argoverse_HD.yaml +66 -0
data/GlobalWheat2020.yaml +26 -29
data/{objects365.yaml → Objects365.yaml} +12 -11
data/SKU-110K.yaml +14 -15
data/VOC.yaml +79 -0
data/VisDrone.yaml +11 -12
data/argoverse_hd.yaml +0 -21
data/coco.yaml +27 -19
data/coco128.yaml +14 -13
data/hyps/hyp.finetune.yaml +1 -1
data/scripts/get_argoverse_hd.sh +0 -61
data/scripts/get_voc.sh +0 -116
data/voc.yaml +0 -21
test.py +5 -4
train.py +1 -1
tutorial.ipynb +1 -1
utils/general.py +11 -4

data/Argoverse_HD.yaml ADDED Viewed

	@@ -0,0 +1,66 @@

+# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
+# Train command: python train.py --data Argoverse_HD.yaml
+# Default dataset location is next to YOLOv5:
+#   /parent
+#     /datasets/Argoverse
+#     /yolov5
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Argoverse  # dataset root dir
+train: Argoverse-1.1/images/train/  # train images (relative to 'path') 39384 images
+val: Argoverse-1.1/images/val/  # val images (relative to 'path') 15062 images
+test: Argoverse-1.1/images/test/  # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
+# Classes
+nc: 8  # number of classes
+names: [ 'person',  'bicycle',  'car',  'motorcycle',  'bus',  'truck',  'traffic_light',  'stop_sign' ]  # class names
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import json
+  from tqdm import tqdm
+  from utils.general import download, Path
+  def argoverse2yolo(set):
+      labels = {}
+      a = json.load(open(set, "rb"))
+      for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
+          img_id = annot['image_id']
+          img_name = a['images'][img_id]['name']
+          img_label_name = img_name[:-3] + "txt"
+          cls = annot['category_id']  # instance class id
+          x_center, y_center, width, height = annot['bbox']
+          x_center = (x_center + width / 2) / 1920.0  # offset and scale
+          y_center = (y_center + height / 2) / 1200.0  # offset and scale
+          width /= 1920.0  # scale
+          height /= 1200.0  # scale
+          img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
+          if not img_dir.exists():
+              img_dir.mkdir(parents=True, exist_ok=True)
+          k = str(img_dir / img_label_name)
+          if k not in labels:
+              labels[k] = []
+          labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
+      for k in labels:
+          with open(k, "w") as f:
+              f.writelines(labels[k])
+  # Download
+  dir = Path('../datasets/Argoverse')  # dataset root dir
+  urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
+  download(urls, dir=dir, delete=False)
+  # Convert
+  annotations_dir = 'Argoverse-HD/annotations/'
+  (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images')  # rename 'tracking' to 'images'
+  for d in "train.json", "val.json":
+      argoverse2yolo(dir / annotations_dir / d)  # convert VisDrone annotations to YOLO labels

data/GlobalWheat2020.yaml CHANGED Viewed

@@ -1,43 +1,40 @@
 # Global Wheat 2020 dataset http://www.global-wheat.com/
 # Train command: python train.py --data GlobalWheat2020.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
 #     /datasets/GlobalWheat2020
 #     /yolov5
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: # 3422 images
-  - ../datasets/GlobalWheat2020/images/arvalis_1
-  - ../datasets/GlobalWheat2020/images/arvalis_2
-  - ../datasets/GlobalWheat2020/images/arvalis_3
-  - ../datasets/GlobalWheat2020/images/ethz_1
-  - ../datasets/GlobalWheat2020/images/rres_1
-  - ../datasets/GlobalWheat2020/images/inrae_1
-  - ../datasets/GlobalWheat2020/images/usask_1
-val: # 748 images (WARNING: train set contains ethz_1)
-  - ../datasets/GlobalWheat2020/images/ethz_1
-test: # 1276 images
-  - ../datasets/GlobalWheat2020/images/utokyo_1
-  - ../datasets/GlobalWheat2020/images/utokyo_2
-  - ../datasets/GlobalWheat2020/images/nau_1
-  - ../datasets/GlobalWheat2020/images/uq_1
-# number of classes
-nc: 1
-# class names
-names: [ 'wheat_head' ]
-# download command/URL (optional) --------------------------------------------------------------------------------------
 download: |
   from utils.general import download, Path
   # Download
-  dir = Path('../datasets/GlobalWheat2020')  # dataset directory
   urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
   download(urls, dir=dir)

 # Global Wheat 2020 dataset http://www.global-wheat.com/
 # Train command: python train.py --data GlobalWheat2020.yaml
 # Default dataset location is next to YOLOv5:
+#   /parent
 #     /datasets/GlobalWheat2020
 #     /yolov5
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/GlobalWheat2020  # dataset root dir
+train: # train images (relative to 'path') 3422 images
+  - images/arvalis_1
+  - images/arvalis_2
+  - images/arvalis_3
+  - images/ethz_1
+  - images/rres_1
+  - images/inrae_1
+  - images/usask_1
+val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
+  - images/ethz_1
+test: # test images (optional) 1276 images
+  - images/utokyo_1
+  - images/utokyo_2
+  - images/nau_1
+  - images/uq_1
+# Classes
+nc: 1  # number of classes
+names: [ 'wheat_head' ]  # class names
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   from utils.general import download, Path
   # Download
+  dir = Path(yaml['path'])  # dataset root dir
   urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
   download(urls, dir=dir)

data/{objects365.yaml → Objects365.yaml} RENAMED Viewed

@@ -1,18 +1,19 @@
 # Objects365 dataset https://www.objects365.org/
-# Train command: python train.py --data objects365.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /datasets/objects365
 #     /yolov5
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../datasets/objects365/images/train  # 1742289 images
-val: ../datasets/objects365/images/val # 5570 images
-# number of classes
-nc: 365
-# class names
 names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
          'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
          'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
@@ -56,7 +57,7 @@ names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gl
          'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ]
-# download command/URL (optional) --------------------------------------------------------------------------------------
 download: |
   from pycocotools.coco import COCO
   from tqdm import tqdm
@@ -64,7 +65,7 @@ download: |
   from utils.general import download, Path
   # Make Directories
-  dir = Path('../datasets/objects365')  # dataset directory
   for p in 'images', 'labels':
       (dir / p).mkdir(parents=True, exist_ok=True)
       for q in 'train', 'val':

 # Objects365 dataset https://www.objects365.org/
+# Train command: python train.py --data Objects365.yaml
 # Default dataset location is next to YOLOv5:
+#   /parent
+#     /datasets/Objects365
 #     /yolov5
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Objects365  # dataset root dir
+train: images/train  # train images (relative to 'path') 1742289 images
+val: images/val # val images (relative to 'path') 5570 images
+test:  # test images (optional)
+# Classes
+nc: 365  # number of classes
 names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
          'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
          'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
          'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ]
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   from pycocotools.coco import COCO
   from tqdm import tqdm
   from utils.general import download, Path
   # Make Directories
+  dir = Path(yaml['path'])  # dataset root dir
   for p in 'images', 'labels':
       (dir / p).mkdir(parents=True, exist_ok=True)
       for q in 'train', 'val':

data/SKU-110K.yaml CHANGED Viewed

@@ -1,39 +1,38 @@
 # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19
 # Train command: python train.py --data SKU-110K.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
 #     /datasets/SKU-110K
 #     /yolov5
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../datasets/SKU-110K/train.txt  # 8219 images
-val: ../datasets/SKU-110K/val.txt  # 588 images
-test: ../datasets/SKU-110K/test.txt  # 2936 images
-# number of classes
-nc: 1
-# class names
-names: [ 'object' ]
-# download command/URL (optional) --------------------------------------------------------------------------------------
 download: |
   import shutil
   from tqdm import tqdm
   from utils.general import np, pd, Path, download, xyxy2xywh
   # Download
-  datasets = Path('../datasets')  # download directory
   urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
-  download(urls, dir=datasets, delete=False)
   # Rename directories
-  dir = (datasets / 'SKU-110K')
   if dir.exists():
       shutil.rmtree(dir)
-  (datasets / 'SKU110K_fixed').rename(dir)  # rename dir
   (dir / 'labels').mkdir(parents=True, exist_ok=True)  # create labels dir
   # Convert labels

 # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19
 # Train command: python train.py --data SKU-110K.yaml
 # Default dataset location is next to YOLOv5:
+#   /parent
 #     /datasets/SKU-110K
 #     /yolov5
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/SKU-110K  # dataset root dir
+train: train.txt  # train images (relative to 'path')  8219 images
+val: val.txt  # val images (relative to 'path')  588 images
+test: test.txt  # test images (optional)  2936 images
+# Classes
+nc: 1  # number of classes
+names: [ 'object' ]  # class names
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   import shutil
   from tqdm import tqdm
   from utils.general import np, pd, Path, download, xyxy2xywh
   # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  parent = Path(dir.parent)  # download dir
   urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
+  download(urls, dir=parent, delete=False)
   # Rename directories
   if dir.exists():
       shutil.rmtree(dir)
+  (parent / 'SKU110K_fixed').rename(dir)  # rename dir
   (dir / 'labels').mkdir(parents=True, exist_ok=True)  # create labels dir
   # Convert labels

data/VOC.yaml ADDED Viewed

	@@ -0,0 +1,79 @@

+# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
+# Train command: python train.py --data VOC.yaml
+# Default dataset location is next to YOLOv5:
+#   /parent
+#     /datasets/VOC
+#     /yolov5
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VOC
+train: # train images (relative to 'path')  16551 images
+  - images/train2012
+  - images/train2007
+  - images/val2012
+  - images/val2007
+val: # val images (relative to 'path')  4952 images
+  - images/test2007
+test: # test images (optional)
+  - images/test2007
+# Classes
+nc: 20  # number of classes
+names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+         'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]  # class names
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import xml.etree.ElementTree as ET
+  from tqdm import tqdm
+  from utils.general import download, Path
+  def convert_label(path, lb_path, year, image_id):
+      def convert_box(size, box):
+          dw, dh = 1. / size[0], 1. / size[1]
+          x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
+          return x * dw, y * dh, w * dw, h * dh
+      in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
+      out_file = open(lb_path, 'w')
+      tree = ET.parse(in_file)
+      root = tree.getroot()
+      size = root.find('size')
+      w = int(size.find('width').text)
+      h = int(size.find('height').text)
+      for obj in root.iter('object'):
+          cls = obj.find('name').text
+          if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
+              xmlbox = obj.find('bndbox')
+              bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
+              cls_id = yaml['names'].index(cls)  # class id
+              out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + 'VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
+          url + 'VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
+          url + 'VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
+  download(urls, dir=dir / 'images', delete=False)
+  # Convert
+  path = dir / f'images/VOCdevkit'
+  for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
+      imgs_path = dir / 'images' / f'{image_set}{year}'
+      lbs_path = dir / 'labels' / f'{image_set}{year}'
+      imgs_path.mkdir(exist_ok=True, parents=True)
+      lbs_path.mkdir(exist_ok=True, parents=True)
+      image_ids = open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt').read().strip().split()
+      for id in tqdm(image_ids, desc=f'{image_set}{year}'):
+          f = path / f'VOC{year}/JPEGImages/{id}.jpg'  # old img path
+          lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
+          f.rename(imgs_path / f.name)  # move image
+          convert_label(path, lb_path, year, id)  # convert labels to YOLO format

data/VisDrone.yaml CHANGED Viewed

@@ -1,24 +1,23 @@
 # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
 # Train command: python train.py --data VisDrone.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /VisDrone
 #     /yolov5
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../VisDrone/VisDrone2019-DET-train/images  # 6471 images
-val: ../VisDrone/VisDrone2019-DET-val/images  # 548 images
-test: ../VisDrone/VisDrone2019-DET-test-dev/images  # 1610 images
-# number of classes
-nc: 10
-# class names
 names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]
-# download command/URL (optional) --------------------------------------------------------------------------------------
 download: |
   from utils.general import download, os, Path
@@ -49,7 +48,7 @@ download: |
   # Download
-  dir = Path('../VisDrone')  # dataset directory
   urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',

 # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
 # Train command: python train.py --data VisDrone.yaml
 # Default dataset location is next to YOLOv5:
+#   /parent
+#     /datasets/VisDrone
 #     /yolov5
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VisDrone  # dataset root dir
+train: VisDrone2019-DET-train/images  # train images (relative to 'path')  6471 images
+val: VisDrone2019-DET-val/images  # val images (relative to 'path')  548 images
+test: VisDrone2019-DET-test-dev/images  # test images (optional)  1610 images
+# Classes
+nc: 10  # number of classes
 names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   from utils.general import download, os, Path
   # Download
+  dir = Path(yaml['path'])  # dataset root dir
   urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',

data/argoverse_hd.yaml DELETED Viewed

@@ -1,21 +0,0 @@
-# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
-# Train command: python train.py --data argoverse_hd.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /argoverse
-#     /yolov5
-# download command/URL (optional)
-download: bash data/scripts/get_argoverse_hd.sh
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../argoverse/Argoverse-1.1/images/train/  # 39384 images
-val: ../argoverse/Argoverse-1.1/images/val/  # 15062 iamges
-test: ../argoverse/Argoverse-1.1/images/test/  # Submit to: https://eval.ai/web/challenges/challenge-page/800/overview
-# number of classes
-nc: 8
-# class names
-names: [ 'person',  'bicycle',  'car',  'motorcycle',  'bus',  'truck',  'traffic_light',  'stop_sign' ]

data/coco.yaml CHANGED Viewed

@@ -1,23 +1,19 @@
 # COCO 2017 dataset http://cocodataset.org
 # Train command: python train.py --data coco.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /coco
 #     /yolov5
-# download command/URL (optional)
-download: bash data/scripts/get_coco.sh
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../coco/train2017.txt  # 118287 images
-val: ../coco/val2017.txt  # 5000 images
-test: ../coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
-# number of classes
-nc: 80
-# class names
 names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
@@ -26,10 +22,22 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
-         'hair drier', 'toothbrush' ]
-# Print classes
-# with open('data/coco.yaml') as f:
-#   d = yaml.safe_load(f)  # dict
-#   for i, x in enumerate(d['names']):
-#     print(i, x)

 # COCO 2017 dataset http://cocodataset.org
 # Train command: python train.py --data coco.yaml
 # Default dataset location is next to YOLOv5:
+#   /parent
+#     /datasets/coco
 #     /yolov5
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco  # dataset root dir
+train: train2017.txt  # train images (relative to 'path') 118287 images
+val: val2017.txt  # train images (relative to 'path') 5000 images
+test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+# Classes
+nc: 80  # number of classes
 names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+         'hair drier', 'toothbrush' ]  # class names
+# Download script/URL (optional)
+download: |
+  from utils.general import download, Path
+  # Download labels
+  segments = False  # segment or box labels
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
+  download(urls, dir=dir.parent)
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)

data/coco128.yaml CHANGED Viewed

@@ -1,22 +1,19 @@
 # COCO 2017 dataset http://cocodataset.org - first 128 training images
 # Train command: python train.py --data coco128.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /coco128
 #     /yolov5
-# download command/URL (optional)
-download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../coco128/images/train2017/  # 128 images
-val: ../coco128/images/train2017/  # 128 images
-# number of classes
-nc: 80
-# class names
 names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
@@ -25,4 +22,8 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
-         'hair drier', 'toothbrush' ]

 # COCO 2017 dataset http://cocodataset.org - first 128 training images
 # Train command: python train.py --data coco128.yaml
 # Default dataset location is next to YOLOv5:
+#   /parent
+#     /datasets/coco128
 #     /yolov5
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco128  # dataset root dir
+train: images/train2017  # train images (relative to 'path') 128 images
+val: images/train2017  # val images (relative to 'path') 128 images
+test:  # test images (optional)
+# Classes
+nc: 80  # number of classes
 names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+         'hair drier', 'toothbrush' ]  # class names
+# Download script/URL (optional)
+download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip

data/hyps/hyp.finetune.yaml CHANGED Viewed

@@ -1,5 +1,5 @@
 # Hyperparameters for VOC finetuning
-# python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50
 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials

 # Hyperparameters for VOC finetuning
+# python train.py --batch 64 --weights yolov5m.pt --data VOC.yaml --img 512 --epochs 50
 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials

data/scripts/get_argoverse_hd.sh DELETED Viewed

@@ -1,61 +0,0 @@
-#!/bin/bash
-# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
-# Download command: bash data/scripts/get_argoverse_hd.sh
-# Train command: python train.py --data argoverse_hd.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /argoverse
-#     /yolov5
-# Download/unzip images
-d='../argoverse/' # unzip directory
-mkdir $d
-url=https://argoverse-hd.s3.us-east-2.amazonaws.com/
-f=Argoverse-HD-Full.zip
-curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &# download, unzip, remove in background
-wait                                              # finish background tasks
-cd ../argoverse/Argoverse-1.1/
-ln -s tracking images
-cd ../Argoverse-HD/annotations/
-python3 - "$@" <<END
-import json
-from pathlib import Path
-annotation_files = ["train.json", "val.json"]
-print("Converting annotations to YOLOv5 format...")
-for val in annotation_files:
-    a = json.load(open(val, "rb"))
-    label_dict = {}
-    for annot in a['annotations']:
-        img_id = annot['image_id']
-        img_name = a['images'][img_id]['name']
-        img_label_name = img_name[:-3] + "txt"
-        cls = annot['category_id']  # instance class id
-        x_center, y_center, width, height = annot['bbox']
-        x_center = (x_center + width / 2) / 1920.  # offset and scale
-        y_center = (y_center + height / 2) / 1200.  # offset and scale
-        width /= 1920.  # scale
-        height /= 1200.  # scale
-        img_dir = "./labels/" + a['seq_dirs'][a['images'][annot['image_id']]['sid']]
-        Path(img_dir).mkdir(parents=True, exist_ok=True)
-        if img_dir + "/" + img_label_name not in label_dict:
-            label_dict[img_dir + "/" + img_label_name] = []
-        label_dict[img_dir + "/" + img_label_name].append(f"{cls} {x_center} {y_center} {width} {height}\n")
-    for filename in label_dict:
-        with open(filename, "w") as file:
-            for string in label_dict[filename]:
-                file.write(string)
-END
-mv ./labels ../../Argoverse-1.1/

data/scripts/get_voc.sh DELETED Viewed

@@ -1,116 +0,0 @@
-#!/bin/bash
-# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
-# Download command: bash data/scripts/get_voc.sh
-# Train command: python train.py --data voc.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /VOC
-#     /yolov5
-start=$(date +%s)
-mkdir -p ../tmp
-cd ../tmp/
-# Download/unzip images and labels
-d='.' # unzip directory
-url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
-f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
-f2=VOCtest_06-Nov-2007.zip     # 438MB, 4953 images
-f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
-for f in $f3 $f2 $f1; do
-  echo 'Downloading' $url$f '...'
-  curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
-done
-wait # finish background tasks
-end=$(date +%s)
-runtime=$((end - start))
-echo "Completed in" $runtime "seconds"
-echo "Splitting dataset..."
-python3 - "$@" <<END
-import os
-import xml.etree.ElementTree as ET
-from os import getcwd
-sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
-classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
-           "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
-def convert_box(size, box):
-    dw = 1. / (size[0])
-    dh = 1. / (size[1])
-    x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
-    return x * dw, y * dh, w * dw, h * dh
-def convert_annotation(year, image_id):
-    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
-    out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')
-    tree = ET.parse(in_file)
-    root = tree.getroot()
-    size = root.find('size')
-    w = int(size.find('width').text)
-    h = int(size.find('height').text)
-    for obj in root.iter('object'):
-        difficult = obj.find('difficult').text
-        cls = obj.find('name').text
-        if cls not in classes or int(difficult) == 1:
-            continue
-        cls_id = classes.index(cls)
-        xmlbox = obj.find('bndbox')
-        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
-             float(xmlbox.find('ymax').text))
-        bb = convert_box((w, h), b)
-        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
-cwd = getcwd()
-for year, image_set in sets:
-    if not os.path.exists('VOCdevkit/VOC%s/labels/' % year):
-        os.makedirs('VOCdevkit/VOC%s/labels/' % year)
-    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()
-    list_file = open('%s_%s.txt' % (year, image_set), 'w')
-    for image_id in image_ids:
-        list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (cwd, year, image_id))
-        convert_annotation(year, image_id)
-    list_file.close()
-END
-cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
-cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
-mkdir ../VOC ../VOC/images ../VOC/images/train ../VOC/images/val
-mkdir ../VOC/labels ../VOC/labels/train ../VOC/labels/val
-python3 - "$@" <<END
-import os
-print(os.path.exists('../tmp/train.txt'))
-with open('../tmp/train.txt', 'r') as f:
-    for line in f.readlines():
-        line = "/".join(line.split('/')[-5:]).strip()
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/images/train")
-        line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/labels/train")
-print(os.path.exists('../tmp/2007_test.txt'))
-with open('../tmp/2007_test.txt', 'r') as f:
-    for line in f.readlines():
-        line = "/".join(line.split('/')[-5:]).strip()
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/images/val")
-        line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/labels/val")
-END
-rm -rf ../tmp # remove temporary directory
-echo "VOC download done."

data/voc.yaml DELETED Viewed

@@ -1,21 +0,0 @@
-# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
-# Train command: python train.py --data voc.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /VOC
-#     /yolov5
-# download command/URL (optional)
-download: bash data/scripts/get_voc.sh
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../VOC/images/train/  # 16551 images
-val: ../VOC/images/val/  # 4952 images
-# number of classes
-nc: 20
-# class names
-names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
-         'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]

test.py CHANGED Viewed

@@ -76,6 +76,11 @@ def run(data,
         # if device.type != 'cpu' and torch.cuda.device_count() > 1:
         #     model = nn.DataParallel(model)
     # Half
     half &= device.type != 'cpu'  # half precision only supported on CUDA
     if half:
@@ -83,10 +88,6 @@ def run(data,
     # Configure
     model.eval()
-    if isinstance(data, str):
-        with open(data) as f:
-            data = yaml.safe_load(f)
-    check_dataset(data)  # check
     is_coco = type(data['val']) is str and data['val'].endswith('coco/val2017.txt')  # COCO dataset
     nc = 1 if single_cls else int(data['nc'])  # number of classes
     iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for [email protected]:0.95

         # if device.type != 'cpu' and torch.cuda.device_count() > 1:
         #     model = nn.DataParallel(model)
+        # Data
+        with open(data) as f:
+            data = yaml.safe_load(f)
+        check_dataset(data)  # check
     # Half
     half &= device.type != 'cpu'  # half precision only supported on CUDA
     if half:
     # Configure
     model.eval()
     is_coco = type(data['val']) is str and data['val'].endswith('coco/val2017.txt')  # COCO dataset
     nc = 1 if single_cls else int(data['nc'])  # number of classes
     iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for [email protected]:0.95

train.py CHANGED Viewed

@@ -453,7 +453,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
         if not evolve:
             if is_coco:  # COCO dataset
                 for m in [last, best] if best.exists() else [last]:  # speed, mAP tests
-                    results, _, _ = test.run(data,
                                              batch_size=batch_size // WORLD_SIZE * 2,
                                              imgsz=imgsz_test,
                                              conf_thres=0.001,

         if not evolve:
             if is_coco:  # COCO dataset
                 for m in [last, best] if best.exists() else [last]:  # speed, mAP tests
+                    results, _, _ = test.run(data_dict,
                                              batch_size=batch_size // WORLD_SIZE * 2,
                                              imgsz=imgsz_test,
                                              conf_thres=0.001,

tutorial.ipynb CHANGED Viewed

@@ -1255,7 +1255,7 @@
       "source": [
         "# VOC\n",
         "for b, m in zip([64, 48, 32, 16], ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']):  # zip(batch_size, model)\n",
-        "  !python train.py --batch {b} --weights {m}.pt --data voc.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}"
       ],
       "execution_count": null,
       "outputs": []

       "source": [
         "# VOC\n",
         "for b, m in zip([64, 48, 32, 16], ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']):  # zip(batch_size, model)\n",
+        "  !python train.py --batch {b} --weights {m}.pt --data VOC.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}"
       ],
       "execution_count": null,
       "outputs": []

utils/general.py CHANGED Viewed

@@ -222,9 +222,14 @@ def check_file(file):
 def check_dataset(data, autodownload=True):
     # Download dataset if not found locally
-    val, s = data.get('val'), data.get('download')
     if val:
-        root = Path(val).parts[0] + os.sep  # unzip directory i.e. '../'
         val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
         if not all(x.exists() for x in val):
             print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
@@ -233,12 +238,14 @@ def check_dataset(data, autodownload=True):
                     f = Path(s).name  # filename
                     print(f'Downloading {s} ...')
                     torch.hub.download_url_to_file(s, f)
                     r = os.system(f'unzip -q {f} -d {root} && rm {f}')  # unzip
                 elif s.startswith('bash '):  # bash script
                     print(f'Running {s} ...')
                     r = os.system(s)
                 else:  # python script
-                    r = exec(s)  # return None
                 print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure'))  # print result
             else:
                 raise Exception('Dataset not found.')
@@ -258,7 +265,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
         if unzip and f.suffix in ('.zip', '.gz'):
             print(f'Unzipping {f}...')
             if f.suffix == '.zip':
-                s = f'unzip -qo {f} -d {dir} && rm {f}'  # unzip -quiet -overwrite
             elif f.suffix == '.gz':
                 s = f'tar xfz {f} --directory {f.parent}'  # unzip
             if delete:  # delete zip file after unzip

 def check_dataset(data, autodownload=True):
     # Download dataset if not found locally
+    path = Path(data.get('path', ''))  # optional 'path' field
+    if path:
+        for k in 'train', 'val', 'test':
+            if data.get(k):  # prepend path
+                data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
+    train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')]
     if val:
         val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
         if not all(x.exists() for x in val):
             print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
                     f = Path(s).name  # filename
                     print(f'Downloading {s} ...')
                     torch.hub.download_url_to_file(s, f)
+                    root = path.parent if 'path' in data else '..'  # unzip directory i.e. '../'
+                    Path(root).mkdir(parents=True, exist_ok=True)  # create root
                     r = os.system(f'unzip -q {f} -d {root} && rm {f}')  # unzip
                 elif s.startswith('bash '):  # bash script
                     print(f'Running {s} ...')
                     r = os.system(s)
                 else:  # python script
+                    r = exec(s, {'yaml': data})  # return None
                 print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure'))  # print result
             else:
                 raise Exception('Dataset not found.')
         if unzip and f.suffix in ('.zip', '.gz'):
             print(f'Unzipping {f}...')
             if f.suffix == '.zip':
+                s = f'unzip -qo {f} -d {dir}'  # unzip -quiet -overwrite
             elif f.suffix == '.gz':
                 s = f'tar xfz {f} --directory {f.parent}'  # unzip
             if delete:  # delete zip file after unzip