File size: 4,366 Bytes
bdcc619
d650f60
bdcc619
 
 
 
 
 
47bee20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c084333
47bee20
a24e92a
 
 
47bee20
 
 
 
 
 
 
 
 
bdcc619
47bee20
 
 
 
c084333
47bee20
 
 
 
 
 
 
 
 
 
 
bdcc619
 
 
 
8b84a69
bdcc619
 
 
 
 
a24e92a
 
bdcc619
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47bee20
a24e92a
bdcc619
 
a24e92a
ffbdd76
bdcc619
a04535e
47bee20
e3c4fa8
 
a24e92a
bdcc619
a24e92a
e3c4fa8
bdcc619
e3c4fa8
a24e92a
bdcc619
 
 
 
a24e92a
e3c4fa8
bdcc619
e3c4fa8
a24e92a
ffbdd76
 
 
 
 
a24e92a
e3c4fa8
bdcc619
e3c4fa8
a24e92a
bdcc619
8b84a69
bdcc619
 
a24e92a
e3c4fa8
 
bdcc619
 
e3c4fa8
a24e92a
 
bdcc619
e977585
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import os

import yaml

import fiftyone as fo
import fiftyone.utils.random as four
import fiftyone.utils.huggingface as fouh

#IMPLEMENT YOUR FUNCTIONS FOR DATA CURATION HERE, BELOW ARE JUST DUMMY FUNCTIONS AS EXAMPLES

def shuffle_data(dataset):
    """Shuffle the dataset"""
    return dataset.shuffle(seed=51)

def take_random_sample(dataset):
    """Take a sample from the dataset"""
    return dataset.take(size=10,seed=51)

# DEFINE YOUR TRAINING HYPERPARAMETERS IN THIS DICTIONARY
training_config = {
    # Dataset split
    "train_split": 0.9,
    "val_split": 0.1,

    # Training parameters
    "train_params": {
        "epochs": 1,
        "batch": 16,
        "imgsz": 640,
        "lr0": 0.01,
        "lrf": 0.01
    }
}


# WRAP YOUR DATASET CURATION FUNCTIONS IN THIS FUNCTION
def prepare_dataset():
    """
    Prepare the dataset for model training. 
    
    NOTE: You there are lines you must not modify in this function. They are marked with "DO NOT MODIFY".
    
    Args:
        name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
    
    Returns:
        fiftyone.core.dataset.Dataset: The curated dataset.
    
    Note:
        The following code block MUST NOT be removed from your submission:

        This ensures that only the approved dataset is used for the competition.
    """
    
    # DO NOT MODIFY THIS LINE
    dataset = fouh.load_from_hub("/tmp/data/train")
    
    # WRAP YOUR DATA CURATION FUNCTIONS HERE
    dataset = shuffle_data(dataset)
    dataset = take_random_sample(dataset)
    
    # DO NOT MODIFY BELOW THIS LINE
    curated_dataset = dataset.clone(name="curated_dataset")
    
    curated_dataset.persistent = True

# DO NOT MODIFY THIS FUNCTION
def export_to_yolo_format(
    samples,
    classes,
    label_field="ground_truth",
    export_dir=".",
    splits=["train", "val"]
):
    """
    Export samples to YOLO format, optionally handling multiple data splits.

    NOTE: DO NOT MODIFY THIS FUNCTION.

    Args:
        samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
        export_dir (str): The directory where the exported data will be saved.
        classes (list): A list of class names for the YOLO format.
        label_field (str, optional): The field in the samples that contains the labels.
            Defaults to "ground_truth".
        splits (str, list, optional): The split(s) to export. Can be a single split name (str) 
            or a list of split names. If None, all samples are exported as "val" split. 
            Defaults to None.

    Returns:
        None

    """
    if splits is None:
        splits = ["val"]
    elif isinstance(splits, str):
        splits = [splits]

    for split in splits:
        split_view = samples if split == "val" and splits == ["val"] else samples.match_tags(split)
        
        split_view.export(
            export_dir=export_dir,
            dataset_type=fo.types.YOLOv5Dataset,
            label_field=label_field,
            classes=classes,
            split=split
        )

# DO NOT MODIFY THIS FUNCTION
def train_model(training_config=training_config):
    """
    Train the YOLO model on the given dataset using the provided configuration.

    NOTE: DO NOT MODIFY THIS FUNCTION AT ALL OR YOUR SCRIPT WILL FAIL.
    """

    training_dataset = prepare_dataset()

    print("Splitting the dataset...")

    four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
    
    print("Dataset split completed.")

    print("Exporting dataset to YOLO format...")

    export_to_yolo_format(
        samples=training_dataset,
        classes=training_dataset.default_classes,
    )

    print("Dataset export completed.")

    print("Initializing the YOLO model...")

    #DO NOT MODIFY THIS LINE
    model = YOLO(
        model="/tmp/data/yolo11m.pt",
        
    )
    
    print("Model initialized.")

    print("Starting model training...")

    results = model.train(
        data="dataset.yaml",
        **training_config['train_params']
    )

    print("Model training completed.")

    best_model_path = str(results.save_dir / "weights/best.pt")

    print(f"Best model saved to: {best_model_path}")

# DO NOT MODIFY THE BELOW
if __name__=="__main__":
    train_model()