datascienceharp commited on
Commit
a24e92a
·
1 Parent(s): 47bee20
Files changed (3) hide show
  1. data_curation.py +0 -62
  2. script.py +18 -5
  3. training_config.yaml +0 -11
data_curation.py DELETED
@@ -1,62 +0,0 @@
1
- """
2
- This script is used to curate the data for the project.
3
-
4
- Implement your functions to to clean the data and prepare it for model training.
5
-
6
- Note: the competition requires that you use FiftyOne for data curation and you are only allowed to
7
- use the approaved dataset from the hub, Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set, which can
8
- be found here: https://huggingface.co/datasets/Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set
9
- """
10
-
11
- import fiftyone as fo
12
- import fiftyone.utils.huggingface as fouh
13
-
14
- # Implement functions for data curation. below are just dummy functions as examples
15
-
16
- def shuffle_data(dataset):
17
- """Shuffle the dataset"""
18
- return dataset.shuffle(seed=51)
19
-
20
- def take_random_sample(dataset):
21
- """Take a sample from the dataset"""
22
- return dataset.take(size=10,seed=51)
23
-
24
- def prepare_dataset(name="Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"):
25
- """
26
- Prepare the dataset for model training.
27
-
28
- Args:
29
- name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
30
-
31
- Returns:
32
- fiftyone.core.dataset.Dataset: The curated dataset.
33
-
34
- Raises:
35
- ValueError: If the provided dataset name is not the approved one.
36
-
37
- Note:
38
- The following code block MUST NOT be removed from your submission:
39
-
40
- APPROVED_DATASET = "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"
41
-
42
- if name != APPROVED_DATASET:
43
- raise ValueError(f"Only the approved dataset '{APPROVED_DATASET}' is allowed for this competition.")
44
-
45
- This ensures that only the approved dataset is used for the competition.
46
- """
47
- APPROVED_DATASET = "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"
48
-
49
- if name != APPROVED_DATASET:
50
- raise ValueError(f"Only the approved dataset '{APPROVED_DATASET}' is allowed for this competition.")
51
-
52
- # Load the approved dataset from the hub
53
- dataset = fouh.load_from_hub(name, split="train")
54
-
55
- # Implement your data curation functions here
56
- dataset = shuffle_data(dataset)
57
- dataset = take_random_sample(dataset)
58
-
59
- # Return the curated dataset
60
- curated_dataset = dataset.clone(name="curated_dataset")
61
-
62
- curated_dataset.persistent = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
script.py CHANGED
@@ -49,7 +49,9 @@ training_config = {
49
  # WRAP YOUR DATASET CURATION FUNCTIONS IN THIS FUNCTION
50
  def prepare_dataset(name="Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"):
51
  """
52
- Prepare the dataset for model training.
 
 
53
 
54
  Args:
55
  name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
@@ -86,6 +88,8 @@ def export_to_yolo_format(
86
  """
87
  Export samples to YOLO format, optionally handling multiple data splits.
88
 
 
 
89
  Args:
90
  samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
91
  export_dir (str): The directory where the exported data will be saved.
@@ -117,9 +121,11 @@ def export_to_yolo_format(
117
  )
118
 
119
  # DO NOT MODIFY THIS FUNCTION
120
- def train_model(training_config):
121
  """
122
  Train the YOLO model on the given dataset using the provided configuration.
 
 
123
  """
124
 
125
  script_dir = os.path.dirname(os.path.abspath(__file__))
@@ -132,32 +138,39 @@ def train_model(training_config):
132
  training_dataset = prepare_dataset()
133
 
134
  print("Splitting the dataset...")
 
135
  four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
 
136
  print("Dataset split completed.")
137
 
138
  print("Exporting dataset to YOLO format...")
 
139
  export_to_yolo_format(
140
  samples=training_dataset,
141
  classes=training_dataset.default_classes,
142
  )
 
143
  print("Dataset export completed.")
144
 
145
  print("Initializing the YOLO model...")
 
146
  model = YOLO("yolov10m.pt")
 
147
  print("Model initialized.")
148
 
149
  print("Starting model training...")
 
150
  results = model.train(
151
  data="dataset.yaml",
152
  **training_config['train_params']
153
  )
 
154
  print("Model training completed.")
155
 
156
  best_model_path = str(results.save_dir / "weights/best.pt")
157
- print(f"Best model path: {best_model_path}")
158
- best_model = YOLO(best_model_path)
159
- print("Best model loaded.")
160
 
161
  print(f"Best model saved to: {best_model_path}")
 
 
162
  if __name__=="__main__":
163
  train_model()
 
49
  # WRAP YOUR DATASET CURATION FUNCTIONS IN THIS FUNCTION
50
  def prepare_dataset(name="Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"):
51
  """
52
+ Prepare the dataset for model training.
53
+
54
+ NOTE: You there are lines you must not modify in this function. They are marked with "DO NOT MODIFY".
55
 
56
  Args:
57
  name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
 
88
  """
89
  Export samples to YOLO format, optionally handling multiple data splits.
90
 
91
+ NOTE: DO NOT MODIFY THIS FUNCTION.
92
+
93
  Args:
94
  samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
95
  export_dir (str): The directory where the exported data will be saved.
 
121
  )
122
 
123
  # DO NOT MODIFY THIS FUNCTION
124
+ def train_model(training_config=training_config):
125
  """
126
  Train the YOLO model on the given dataset using the provided configuration.
127
+
128
+ NOTE: DO NOT MODIFY THIS FUNCTION.
129
  """
130
 
131
  script_dir = os.path.dirname(os.path.abspath(__file__))
 
138
  training_dataset = prepare_dataset()
139
 
140
  print("Splitting the dataset...")
141
+
142
  four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
143
+
144
  print("Dataset split completed.")
145
 
146
  print("Exporting dataset to YOLO format...")
147
+
148
  export_to_yolo_format(
149
  samples=training_dataset,
150
  classes=training_dataset.default_classes,
151
  )
152
+
153
  print("Dataset export completed.")
154
 
155
  print("Initializing the YOLO model...")
156
+
157
  model = YOLO("yolov10m.pt")
158
+
159
  print("Model initialized.")
160
 
161
  print("Starting model training...")
162
+
163
  results = model.train(
164
  data="dataset.yaml",
165
  **training_config['train_params']
166
  )
167
+
168
  print("Model training completed.")
169
 
170
  best_model_path = str(results.save_dir / "weights/best.pt")
 
 
 
171
 
172
  print(f"Best model saved to: {best_model_path}")
173
+
174
+ # DO NOT MODIFY THE BELOW
175
  if __name__=="__main__":
176
  train_model()
training_config.yaml DELETED
@@ -1,11 +0,0 @@
1
- # Dataset split
2
- train_split: 0.9
3
- val_split: 0.1
4
-
5
- # Training parameters
6
- train_params:
7
- epochs: 1
8
- batch: 16
9
- imgsz: 640
10
- lr0: 0.01
11
- lrf: 0.01