File size: 4,366 Bytes

bdcc619
d650f60
bdcc619
 
 
 
 
 
47bee20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c084333
47bee20
a24e92a
 
 
47bee20
 
 
 
 
 
 
 
 
bdcc619
47bee20
 
 
 
c084333
47bee20
 
 
 
 
 
 
 
 
 
 
bdcc619
 
 
 
8b84a69
bdcc619
 
 
 
 
a24e92a
 
bdcc619
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47bee20
a24e92a
bdcc619
 
a24e92a
ffbdd76
bdcc619
a04535e
47bee20
e3c4fa8
 
a24e92a
bdcc619
a24e92a
e3c4fa8
bdcc619
e3c4fa8
a24e92a
bdcc619
 
 
 
a24e92a
e3c4fa8
bdcc619
e3c4fa8
a24e92a
ffbdd76
 
 
 
 
a24e92a
e3c4fa8
bdcc619
e3c4fa8
a24e92a
bdcc619
8b84a69
bdcc619
 
a24e92a
e3c4fa8
 
bdcc619
 
e3c4fa8
a24e92a
 
bdcc619
e977585

import os

import yaml

import fiftyone as fo
import fiftyone.utils.random as four
import fiftyone.utils.huggingface as fouh

#IMPLEMENT YOUR FUNCTIONS FOR DATA CURATION HERE, BELOW ARE JUST DUMMY FUNCTIONS AS EXAMPLES

def shuffle_data(dataset):
    """Shuffle the dataset"""
    return dataset.shuffle(seed=51)

def take_random_sample(dataset):
    """Take a sample from the dataset"""
    return dataset.take(size=10,seed=51)

# DEFINE YOUR TRAINING HYPERPARAMETERS IN THIS DICTIONARY
training_config = {
    # Dataset split
    "train_split": 0.9,
    "val_split": 0.1,

    # Training parameters
    "train_params": {
        "epochs": 1,
        "batch": 16,
        "imgsz": 640,
        "lr0": 0.01,
        "lrf": 0.01
    }
}


# WRAP YOUR DATASET CURATION FUNCTIONS IN THIS FUNCTION
def prepare_dataset():
    """
    Prepare the dataset for model training. 
    
    NOTE: You there are lines you must not modify in this function. They are marked with "DO NOT MODIFY".
    
    Args:
        name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
    
    Returns:
        fiftyone.core.dataset.Dataset: The curated dataset.
    
    Note:
        The following code block MUST NOT be removed from your submission:

        This ensures that only the approved dataset is used for the competition.
    """
    
    # DO NOT MODIFY THIS LINE
    dataset = fouh.load_from_hub("/tmp/data/train")
    
    # WRAP YOUR DATA CURATION FUNCTIONS HERE
    dataset = shuffle_data(dataset)
    dataset = take_random_sample(dataset)
    
    # DO NOT MODIFY BELOW THIS LINE
    curated_dataset = dataset.clone(name="curated_dataset")
    
    curated_dataset.persistent = True

# DO NOT MODIFY THIS FUNCTION
def export_to_yolo_format(
    samples,
    classes,
    label_field="ground_truth",
    export_dir=".",
    splits=["train", "val"]
):
    """
    Export samples to YOLO format, optionally handling multiple data splits.

    NOTE: DO NOT MODIFY THIS FUNCTION.

    Args:
        samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
        export_dir (str): The directory where the exported data will be saved.
        classes (list): A list of class names for the YOLO format.
        label_field (str, optional): The field in the samples that contains the labels.
            Defaults to "ground_truth".
        splits (str, list, optional): The split(s) to export. Can be a single split name (str) 
            or a list of split names. If None, all samples are exported as "val" split. 
            Defaults to None.

    Returns:
        None

    """
    if splits is None:
        splits = ["val"]
    elif isinstance(splits, str):
        splits = [splits]

    for split in splits:
        split_view = samples if split == "val" and splits == ["val"] else samples.match_tags(split)
        
        split_view.export(
            export_dir=export_dir,
            dataset_type=fo.types.YOLOv5Dataset,
            label_field=label_field,
            classes=classes,
            split=split
        )

# DO NOT MODIFY THIS FUNCTION
def train_model(training_config=training_config):
    """
    Train the YOLO model on the given dataset using the provided configuration.

    NOTE: DO NOT MODIFY THIS FUNCTION AT ALL OR YOUR SCRIPT WILL FAIL.
    """

    training_dataset = prepare_dataset()

    print("Splitting the dataset...")

    four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
    
    print("Dataset split completed.")

    print("Exporting dataset to YOLO format...")

    export_to_yolo_format(
        samples=training_dataset,
        classes=training_dataset.default_classes,
    )

    print("Dataset export completed.")

    print("Initializing the YOLO model...")

    #DO NOT MODIFY THIS LINE
    model = YOLO(
        model="/tmp/data/yolo11m.pt",
        
    )
    
    print("Model initialized.")

    print("Starting model training...")

    results = model.train(
        data="dataset.yaml",
        **training_config['train_params']
    )

    print("Model training completed.")

    best_model_path = str(results.save_dir / "weights/best.pt")

    print(f"Best model saved to: {best_model_path}")

# DO NOT MODIFY THE BELOW
if __name__=="__main__":
    train_model()