File size: 8,145 Bytes
f01c86d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
import os
import shutil
import json
from collections import defaultdict
import random
from tqdm import tqdm
from PIL import Image
def convert_coco_to_yolo(coco_json_path, images_dir, output_dir, class_map, split='train'):
"""Convert COCO format annotations to YOLO format"""
if not os.path.exists(coco_json_path):
print(f"Warning: JSON file not found: {coco_json_path}")
return set()
if not os.path.exists(images_dir):
print(f"Warning: Images directory not found: {images_dir}")
return set()
print(f"\nProcessing {split} split...")
# Create output directories
labels_dir = os.path.join(output_dir, 'labels', split)
images_dir_out = os.path.join(output_dir, 'images', split)
os.makedirs(labels_dir, exist_ok=True)
os.makedirs(images_dir_out, exist_ok=True)
# Load COCO annotations
try:
with open(coco_json_path, 'r') as f:
coco = json.load(f)
except json.JSONDecodeError:
print(f"Error: Invalid JSON file: {coco_json_path}")
return set()
# Create id to filename mapping
id_to_filename = {img['id']: img['file_name'] for img in coco['images']}
# Group annotations by image
img_to_anns = defaultdict(list)
for ann in coco['annotations']:
img_to_anns[ann['image_id']].append(ann)
# Process each image
processed_images = set()
for img_id, anns in tqdm(img_to_anns.items(), desc=f"Converting {split} set"):
img_file = id_to_filename[img_id]
img_path = os.path.join(images_dir, img_file)
if not os.path.exists(img_path):
print(f"Warning: Image {img_path} not found, skipping...")
continue
try:
# Copy image
shutil.copy2(img_path, os.path.join(images_dir_out, img_file))
# Get image dimensions
with Image.open(img_path) as im:
w, h = im.size
# Convert annotations
label_lines = []
for ann in anns:
cat_id = ann['category_id']
if cat_id not in class_map:
print(f"Warning: Unknown category ID {cat_id} in {img_file}")
continue
yolo_cls = class_map[cat_id]
# Convert segmentation points
for seg in ann['segmentation']:
coords = [str(x/w) if i%2==0 else str(x/h) for i,x in enumerate(seg)]
label_lines.append(f"{yolo_cls} {' '.join(coords)}")
# Write label file
label_file = os.path.join(labels_dir, os.path.splitext(img_file)[0] + '.txt')
with open(label_file, 'w') as f:
f.write('\n'.join(label_lines))
processed_images.add(img_id)
except (IOError, OSError) as e:
print(f"Error processing {img_file}: {str(e)}")
continue
return processed_images
def create_balanced_dataset(source_json, images_dir, output_dir, class_map, min_samples=50, split='train'):
"""Create balanced dataset by sampling equal number of images per class"""
print(f"\nCreating balanced dataset for {split} split...")
# Create output directories
labels_dir = os.path.join(output_dir, 'labels', split)
images_dir_out = os.path.join(output_dir, 'images', split)
os.makedirs(labels_dir, exist_ok=True)
os.makedirs(images_dir_out, exist_ok=True)
# Load COCO annotations
with open(source_json, 'r') as f:
coco = json.load(f)
# Group images by parts they contain
images_by_part = defaultdict(set)
image_to_anns = defaultdict(list)
for ann in coco['annotations']:
img_id = ann['image_id']
cat_id = ann['category_id']
images_by_part[cat_id].add(img_id)
image_to_anns[img_id].append(ann)
# Sample images for balanced dataset
selected_images = set()
for part_images in images_by_part.values():
sample_size = min(min_samples, len(part_images))
selected_images.update(random.sample(list(part_images), sample_size))
# Convert selected images to YOLO format
id_to_filename = {img['id']: img['file_name'] for img in coco['images']}
print(f"Processing {len(selected_images)} images for balanced {split} set...")
for img_id in tqdm(selected_images):
img_file = id_to_filename[img_id]
img_path = os.path.join(images_dir, img_file)
if not os.path.exists(img_path):
print(f"Warning: Image {img_path} not found, skipping...")
continue
# Copy image
shutil.copy2(img_path, os.path.join(images_dir_out, img_file))
# Get image dimensions
with Image.open(img_path) as im:
w, h = im.size
# Convert annotations
label_lines = []
for ann in image_to_anns[img_id]:
cat_id = ann['category_id']
yolo_cls = class_map[cat_id]
# Convert segmentation points
for seg in ann['segmentation']:
coords = [str(x/w) if i%2==0 else str(x/h) for i,x in enumerate(seg)]
label_lines.append(f"{yolo_cls} {' '.join(coords)}")
# Write label file
label_file = os.path.join(labels_dir, os.path.splitext(img_file)[0] + '.txt')
with open(label_file, 'w') as f:
f.write('\n'.join(label_lines))
def main():
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
source_dir = os.path.join(base_dir, 'damage_detection_dataset')
if not os.path.exists(source_dir):
print(f"Error: Source directory not found: {source_dir}")
return
# Set up output directories
car_damage_dir = os.path.join(base_dir, 'data', 'data_yolo_for_training', 'car_damage_dataset')
car_parts_dir = os.path.join(base_dir, 'data', 'data_yolo_for_training', 'car_parts_damage_dataset')
# Class mappings
damage_class_map = {1: 0} # Assuming damage is class 1 in COCO format
parts_class_map = {1: 0, 2: 1, 3: 2, 4: 3, 5: 4} # headlamp, front_bumper, hood, door, rear_bumper
# Process car damage dataset (full dataset)
print("\nProcessing Car Damage Dataset...")
for split in ['train', 'val', 'test']:
json_name = 'COCO_train_annos.json' if split == 'train' else 'COCO_val_annos.json'
json_path = os.path.join(source_dir, split, json_name)
images_dir = os.path.join(source_dir, split)
if os.path.exists(json_path):
convert_coco_to_yolo(
json_path,
images_dir,
car_damage_dir,
damage_class_map,
split
)
else:
print(f"Warning: JSON file not found for {split} split: {json_path}")
# Process car parts dataset (balanced training, original val/test)
print("\nProcessing Car Parts Dataset...")
# Training set - balanced
train_json = os.path.join(source_dir, 'train', 'COCO_mul_train_annos.json')
if os.path.exists(train_json):
create_balanced_dataset(
train_json,
os.path.join(source_dir, 'train'),
car_parts_dir,
parts_class_map,
min_samples=50,
split='train'
)
else:
print(f"Warning: Training JSON file not found: {train_json}")
# Validation and test sets - original
for split in ['val', 'test']:
json_path = os.path.join(source_dir, split, 'COCO_mul_val_annos.json')
images_dir = os.path.join(source_dir, split)
if os.path.exists(json_path):
convert_coco_to_yolo(
json_path,
images_dir,
car_parts_dir,
parts_class_map,
split
)
else:
print(f"Warning: JSON file not found for {split} split: {json_path}")
if __name__ == '__main__':
main()
|