Commit
·
a24e92a
1
Parent(s):
47bee20
updates
Browse files- data_curation.py +0 -62
- script.py +18 -5
- training_config.yaml +0 -11
data_curation.py
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
This script is used to curate the data for the project.
|
3 |
-
|
4 |
-
Implement your functions to to clean the data and prepare it for model training.
|
5 |
-
|
6 |
-
Note: the competition requires that you use FiftyOne for data curation and you are only allowed to
|
7 |
-
use the approaved dataset from the hub, Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set, which can
|
8 |
-
be found here: https://huggingface.co/datasets/Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set
|
9 |
-
"""
|
10 |
-
|
11 |
-
import fiftyone as fo
|
12 |
-
import fiftyone.utils.huggingface as fouh
|
13 |
-
|
14 |
-
# Implement functions for data curation. below are just dummy functions as examples
|
15 |
-
|
16 |
-
def shuffle_data(dataset):
|
17 |
-
"""Shuffle the dataset"""
|
18 |
-
return dataset.shuffle(seed=51)
|
19 |
-
|
20 |
-
def take_random_sample(dataset):
|
21 |
-
"""Take a sample from the dataset"""
|
22 |
-
return dataset.take(size=10,seed=51)
|
23 |
-
|
24 |
-
def prepare_dataset(name="Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"):
|
25 |
-
"""
|
26 |
-
Prepare the dataset for model training.
|
27 |
-
|
28 |
-
Args:
|
29 |
-
name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
|
30 |
-
|
31 |
-
Returns:
|
32 |
-
fiftyone.core.dataset.Dataset: The curated dataset.
|
33 |
-
|
34 |
-
Raises:
|
35 |
-
ValueError: If the provided dataset name is not the approved one.
|
36 |
-
|
37 |
-
Note:
|
38 |
-
The following code block MUST NOT be removed from your submission:
|
39 |
-
|
40 |
-
APPROVED_DATASET = "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"
|
41 |
-
|
42 |
-
if name != APPROVED_DATASET:
|
43 |
-
raise ValueError(f"Only the approved dataset '{APPROVED_DATASET}' is allowed for this competition.")
|
44 |
-
|
45 |
-
This ensures that only the approved dataset is used for the competition.
|
46 |
-
"""
|
47 |
-
APPROVED_DATASET = "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"
|
48 |
-
|
49 |
-
if name != APPROVED_DATASET:
|
50 |
-
raise ValueError(f"Only the approved dataset '{APPROVED_DATASET}' is allowed for this competition.")
|
51 |
-
|
52 |
-
# Load the approved dataset from the hub
|
53 |
-
dataset = fouh.load_from_hub(name, split="train")
|
54 |
-
|
55 |
-
# Implement your data curation functions here
|
56 |
-
dataset = shuffle_data(dataset)
|
57 |
-
dataset = take_random_sample(dataset)
|
58 |
-
|
59 |
-
# Return the curated dataset
|
60 |
-
curated_dataset = dataset.clone(name="curated_dataset")
|
61 |
-
|
62 |
-
curated_dataset.persistent = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
script.py
CHANGED
@@ -49,7 +49,9 @@ training_config = {
|
|
49 |
# WRAP YOUR DATASET CURATION FUNCTIONS IN THIS FUNCTION
|
50 |
def prepare_dataset(name="Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"):
|
51 |
"""
|
52 |
-
Prepare the dataset for model training.
|
|
|
|
|
53 |
|
54 |
Args:
|
55 |
name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
|
@@ -86,6 +88,8 @@ def export_to_yolo_format(
|
|
86 |
"""
|
87 |
Export samples to YOLO format, optionally handling multiple data splits.
|
88 |
|
|
|
|
|
89 |
Args:
|
90 |
samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
|
91 |
export_dir (str): The directory where the exported data will be saved.
|
@@ -117,9 +121,11 @@ def export_to_yolo_format(
|
|
117 |
)
|
118 |
|
119 |
# DO NOT MODIFY THIS FUNCTION
|
120 |
-
def train_model(training_config):
|
121 |
"""
|
122 |
Train the YOLO model on the given dataset using the provided configuration.
|
|
|
|
|
123 |
"""
|
124 |
|
125 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
@@ -132,32 +138,39 @@ def train_model(training_config):
|
|
132 |
training_dataset = prepare_dataset()
|
133 |
|
134 |
print("Splitting the dataset...")
|
|
|
135 |
four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
|
|
|
136 |
print("Dataset split completed.")
|
137 |
|
138 |
print("Exporting dataset to YOLO format...")
|
|
|
139 |
export_to_yolo_format(
|
140 |
samples=training_dataset,
|
141 |
classes=training_dataset.default_classes,
|
142 |
)
|
|
|
143 |
print("Dataset export completed.")
|
144 |
|
145 |
print("Initializing the YOLO model...")
|
|
|
146 |
model = YOLO("yolov10m.pt")
|
|
|
147 |
print("Model initialized.")
|
148 |
|
149 |
print("Starting model training...")
|
|
|
150 |
results = model.train(
|
151 |
data="dataset.yaml",
|
152 |
**training_config['train_params']
|
153 |
)
|
|
|
154 |
print("Model training completed.")
|
155 |
|
156 |
best_model_path = str(results.save_dir / "weights/best.pt")
|
157 |
-
print(f"Best model path: {best_model_path}")
|
158 |
-
best_model = YOLO(best_model_path)
|
159 |
-
print("Best model loaded.")
|
160 |
|
161 |
print(f"Best model saved to: {best_model_path}")
|
|
|
|
|
162 |
if __name__=="__main__":
|
163 |
train_model()
|
|
|
49 |
# WRAP YOUR DATASET CURATION FUNCTIONS IN THIS FUNCTION
|
50 |
def prepare_dataset(name="Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"):
|
51 |
"""
|
52 |
+
Prepare the dataset for model training.
|
53 |
+
|
54 |
+
NOTE: You there are lines you must not modify in this function. They are marked with "DO NOT MODIFY".
|
55 |
|
56 |
Args:
|
57 |
name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
|
|
|
88 |
"""
|
89 |
Export samples to YOLO format, optionally handling multiple data splits.
|
90 |
|
91 |
+
NOTE: DO NOT MODIFY THIS FUNCTION.
|
92 |
+
|
93 |
Args:
|
94 |
samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
|
95 |
export_dir (str): The directory where the exported data will be saved.
|
|
|
121 |
)
|
122 |
|
123 |
# DO NOT MODIFY THIS FUNCTION
|
124 |
+
def train_model(training_config=training_config):
|
125 |
"""
|
126 |
Train the YOLO model on the given dataset using the provided configuration.
|
127 |
+
|
128 |
+
NOTE: DO NOT MODIFY THIS FUNCTION.
|
129 |
"""
|
130 |
|
131 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
138 |
training_dataset = prepare_dataset()
|
139 |
|
140 |
print("Splitting the dataset...")
|
141 |
+
|
142 |
four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
|
143 |
+
|
144 |
print("Dataset split completed.")
|
145 |
|
146 |
print("Exporting dataset to YOLO format...")
|
147 |
+
|
148 |
export_to_yolo_format(
|
149 |
samples=training_dataset,
|
150 |
classes=training_dataset.default_classes,
|
151 |
)
|
152 |
+
|
153 |
print("Dataset export completed.")
|
154 |
|
155 |
print("Initializing the YOLO model...")
|
156 |
+
|
157 |
model = YOLO("yolov10m.pt")
|
158 |
+
|
159 |
print("Model initialized.")
|
160 |
|
161 |
print("Starting model training...")
|
162 |
+
|
163 |
results = model.train(
|
164 |
data="dataset.yaml",
|
165 |
**training_config['train_params']
|
166 |
)
|
167 |
+
|
168 |
print("Model training completed.")
|
169 |
|
170 |
best_model_path = str(results.save_dir / "weights/best.pt")
|
|
|
|
|
|
|
171 |
|
172 |
print(f"Best model saved to: {best_model_path}")
|
173 |
+
|
174 |
+
# DO NOT MODIFY THE BELOW
|
175 |
if __name__=="__main__":
|
176 |
train_model()
|
training_config.yaml
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
# Dataset split
|
2 |
-
train_split: 0.9
|
3 |
-
val_split: 0.1
|
4 |
-
|
5 |
-
# Training parameters
|
6 |
-
train_params:
|
7 |
-
epochs: 1
|
8 |
-
batch: 16
|
9 |
-
imgsz: 640
|
10 |
-
lr0: 0.01
|
11 |
-
lrf: 0.01
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|