datascienceharp commited on
Commit
47bee20
·
1 Parent(s): 7e64e15
Files changed (1) hide show
  1. script.py +59 -7
script.py CHANGED
@@ -19,8 +19,63 @@ import fiftyone as fo
19
  import fiftyone.utils.random as four
20
  import fiftyone.utils.huggingface as fouh
21
 
22
- from data_curation import prepare_dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def export_to_yolo_format(
25
  samples,
26
  classes,
@@ -61,7 +116,8 @@ def export_to_yolo_format(
61
  split=split
62
  )
63
 
64
- def train_model():
 
65
  """
66
  Train the YOLO model on the given dataset using the provided configuration.
67
  """
@@ -73,11 +129,7 @@ def train_model():
73
  with open(config_path, 'r') as file:
74
  training_config = yaml.safe_load(file)
75
 
76
-
77
- training_dataset = fouh.load_from_hub(
78
- "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set",
79
- max_samples=100 #for testing remove this later
80
- )
81
 
82
  print("Splitting the dataset...")
83
  four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
 
19
  import fiftyone.utils.random as four
20
  import fiftyone.utils.huggingface as fouh
21
 
22
+ #IMPLEMENT YOUR FUNCTIONS FOR DATA CURATION HERE, BELOW ARE JUST DUMMY FUNCTIONS AS EXAMPLES
23
+
24
+ def shuffle_data(dataset):
25
+ """Shuffle the dataset"""
26
+ return dataset.shuffle(seed=51)
27
+
28
+ def take_random_sample(dataset):
29
+ """Take a sample from the dataset"""
30
+ return dataset.take(size=10,seed=51)
31
+
32
+ # DEFINE YOUR TRAINING HYPERPARAMETERS IN THIS DICTIONARY
33
+ training_config = {
34
+ # Dataset split
35
+ "train_split": 0.9,
36
+ "val_split": 0.1,
37
+
38
+ # Training parameters
39
+ "train_params": {
40
+ "epochs": 1,
41
+ "batch": 16,
42
+ "imgsz": 640,
43
+ "lr0": 0.01,
44
+ "lrf": 0.01
45
+ }
46
+ }
47
+
48
+
49
+ # WRAP YOUR DATASET CURATION FUNCTIONS IN THIS FUNCTION
50
+ def prepare_dataset(name="Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"):
51
+ """
52
+ Prepare the dataset for model training.
53
+
54
+ Args:
55
+ name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
56
+
57
+ Returns:
58
+ fiftyone.core.dataset.Dataset: The curated dataset.
59
+
60
+ Note:
61
+ The following code block MUST NOT be removed from your submission:
62
 
63
+ This ensures that only the approved dataset is used for the competition.
64
+ """
65
+
66
+ # DO NOT MODIFY THIS LINE
67
+ dataset = fouh.load_from_hub(name, split="train")
68
+
69
+ # WRAP YOUR DATA CURATION FUNCTIONS HERE
70
+ dataset = shuffle_data(dataset)
71
+ dataset = take_random_sample(dataset)
72
+
73
+ # DO NOT MODIFY BELOW THIS LINE
74
+ curated_dataset = dataset.clone(name="curated_dataset")
75
+
76
+ curated_dataset.persistent = True
77
+
78
+ # DO NOT MODIFY THIS FUNCTION
79
  def export_to_yolo_format(
80
  samples,
81
  classes,
 
116
  split=split
117
  )
118
 
119
+ # DO NOT MODIFY THIS FUNCTION
120
+ def train_model(training_config):
121
  """
122
  Train the YOLO model on the given dataset using the provided configuration.
123
  """
 
129
  with open(config_path, 'r') as file:
130
  training_config = yaml.safe_load(file)
131
 
132
+ training_dataset = prepare_dataset()
 
 
 
 
133
 
134
  print("Splitting the dataset...")
135
  four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})