File size: 4,366 Bytes
bdcc619 d650f60 bdcc619 47bee20 c084333 47bee20 a24e92a 47bee20 bdcc619 47bee20 c084333 47bee20 bdcc619 8b84a69 bdcc619 a24e92a bdcc619 47bee20 a24e92a bdcc619 a24e92a ffbdd76 bdcc619 a04535e 47bee20 e3c4fa8 a24e92a bdcc619 a24e92a e3c4fa8 bdcc619 e3c4fa8 a24e92a bdcc619 a24e92a e3c4fa8 bdcc619 e3c4fa8 a24e92a ffbdd76 a24e92a e3c4fa8 bdcc619 e3c4fa8 a24e92a bdcc619 8b84a69 bdcc619 a24e92a e3c4fa8 bdcc619 e3c4fa8 a24e92a bdcc619 e977585 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import os
import yaml
import fiftyone as fo
import fiftyone.utils.random as four
import fiftyone.utils.huggingface as fouh
#IMPLEMENT YOUR FUNCTIONS FOR DATA CURATION HERE, BELOW ARE JUST DUMMY FUNCTIONS AS EXAMPLES
def shuffle_data(dataset):
"""Shuffle the dataset"""
return dataset.shuffle(seed=51)
def take_random_sample(dataset):
"""Take a sample from the dataset"""
return dataset.take(size=10,seed=51)
# DEFINE YOUR TRAINING HYPERPARAMETERS IN THIS DICTIONARY
training_config = {
# Dataset split
"train_split": 0.9,
"val_split": 0.1,
# Training parameters
"train_params": {
"epochs": 1,
"batch": 16,
"imgsz": 640,
"lr0": 0.01,
"lrf": 0.01
}
}
# WRAP YOUR DATASET CURATION FUNCTIONS IN THIS FUNCTION
def prepare_dataset():
"""
Prepare the dataset for model training.
NOTE: You there are lines you must not modify in this function. They are marked with "DO NOT MODIFY".
Args:
name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
Returns:
fiftyone.core.dataset.Dataset: The curated dataset.
Note:
The following code block MUST NOT be removed from your submission:
This ensures that only the approved dataset is used for the competition.
"""
# DO NOT MODIFY THIS LINE
dataset = fouh.load_from_hub("/tmp/data/train")
# WRAP YOUR DATA CURATION FUNCTIONS HERE
dataset = shuffle_data(dataset)
dataset = take_random_sample(dataset)
# DO NOT MODIFY BELOW THIS LINE
curated_dataset = dataset.clone(name="curated_dataset")
curated_dataset.persistent = True
# DO NOT MODIFY THIS FUNCTION
def export_to_yolo_format(
samples,
classes,
label_field="ground_truth",
export_dir=".",
splits=["train", "val"]
):
"""
Export samples to YOLO format, optionally handling multiple data splits.
NOTE: DO NOT MODIFY THIS FUNCTION.
Args:
samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
export_dir (str): The directory where the exported data will be saved.
classes (list): A list of class names for the YOLO format.
label_field (str, optional): The field in the samples that contains the labels.
Defaults to "ground_truth".
splits (str, list, optional): The split(s) to export. Can be a single split name (str)
or a list of split names. If None, all samples are exported as "val" split.
Defaults to None.
Returns:
None
"""
if splits is None:
splits = ["val"]
elif isinstance(splits, str):
splits = [splits]
for split in splits:
split_view = samples if split == "val" and splits == ["val"] else samples.match_tags(split)
split_view.export(
export_dir=export_dir,
dataset_type=fo.types.YOLOv5Dataset,
label_field=label_field,
classes=classes,
split=split
)
# DO NOT MODIFY THIS FUNCTION
def train_model(training_config=training_config):
"""
Train the YOLO model on the given dataset using the provided configuration.
NOTE: DO NOT MODIFY THIS FUNCTION AT ALL OR YOUR SCRIPT WILL FAIL.
"""
training_dataset = prepare_dataset()
print("Splitting the dataset...")
four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
print("Dataset split completed.")
print("Exporting dataset to YOLO format...")
export_to_yolo_format(
samples=training_dataset,
classes=training_dataset.default_classes,
)
print("Dataset export completed.")
print("Initializing the YOLO model...")
#DO NOT MODIFY THIS LINE
model = YOLO(
model="/tmp/data/yolo11m.pt",
)
print("Model initialized.")
print("Starting model training...")
results = model.train(
data="dataset.yaml",
**training_config['train_params']
)
print("Model training completed.")
best_model_path = str(results.save_dir / "weights/best.pt")
print(f"Best model saved to: {best_model_path}")
# DO NOT MODIFY THE BELOW
if __name__=="__main__":
train_model()
|