DCVAI-Example-1 / script.py
datascienceharp's picture
updates
a04535e
raw
history blame
3.51 kB
"""
Note: You don't need to modify this file as this script is used to train the model for the project.
All of your work should be done in the data_curation.py script.
You should import your main functions from the data_curation.py script and use them to prepare the dataset for training.
The approved model is `yolov10m` from Ulytralytics.
Your predictions must be in a label_field called "predictions" in the dataset.
See here for more details about hyperparameters for this model: https://docs.ultralytics.com/modes/train/#train-settings
"""
import os
import yaml
import fiftyone as fo
import fiftyone.utils.random as four
import fiftyone.utils.huggingface as fouh
from data_curation import prepare_dataset
def export_to_yolo_format(
samples,
classes,
label_field="ground_truth",
export_dir=".",
splits=["train", "val"]
):
"""
Export samples to YOLO format, optionally handling multiple data splits.
Args:
samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
export_dir (str): The directory where the exported data will be saved.
classes (list): A list of class names for the YOLO format.
label_field (str, optional): The field in the samples that contains the labels.
Defaults to "ground_truth".
splits (str, list, optional): The split(s) to export. Can be a single split name (str)
or a list of split names. If None, all samples are exported as "val" split.
Defaults to None.
Returns:
None
"""
if splits is None:
splits = ["val"]
elif isinstance(splits, str):
splits = [splits]
for split in splits:
split_view = samples if split == "val" and splits == ["val"] else samples.match_tags(split)
split_view.export(
export_dir=export_dir,
dataset_type=fo.types.YOLOv5Dataset,
label_field=label_field,
classes=classes,
split=split
)
def train_model():
"""
Train the YOLO model on the given dataset using the provided configuration.
"""
script_dir = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(script_dir, 'training_config.yaml')
with open(config_path, 'r') as file:
training_config = yaml.safe_load(file)
training_dataset = fouh.load_from_hub(
"Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set",
max_samples=100 #for testing remove this later
)
print("Splitting the dataset...")
four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
print("Dataset split completed.")
print("Exporting dataset to YOLO format...")
export_to_yolo_format(
samples=training_dataset,
classes=training_dataset.default_classes,
)
print("Dataset export completed.")
print("Initializing the YOLO model...")
model = YOLO("yolov10m.pt")
print("Model initialized.")
print("Starting model training...")
results = model.train(
data="dataset.yaml",
**training_config['train_params']
)
print("Model training completed.")
best_model_path = str(results.save_dir / "weights/best.pt")
print(f"Best model path: {best_model_path}")
best_model = YOLO(best_model_path)
print("Best model loaded.")
print(f"Best model saved to: {best_model_path}")
if __name__=="__main__":
train_model()