harpreetsahota
/

DCVAI-Example-1

Model card Files Files and versions Community

datascienceharp commited on Sep 3, 2024

Commit

47bee20

·

1 Parent(s): 7e64e15

updates

Files changed (1) hide show

script.py +59 -7

script.py CHANGED Viewed

@@ -19,8 +19,63 @@ import fiftyone as fo
 import fiftyone.utils.random as four
 import fiftyone.utils.huggingface as fouh
-from data_curation import prepare_dataset
 def export_to_yolo_format(
     samples,
     classes,
@@ -61,7 +116,8 @@ def export_to_yolo_format(
             split=split
         )
-def train_model():
     """
     Train the YOLO model on the given dataset using the provided configuration.
     """
@@ -73,11 +129,7 @@ def train_model():
     with open(config_path, 'r') as file:
         training_config = yaml.safe_load(file)
-    training_dataset = fouh.load_from_hub(
-        "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set",
-        max_samples=100 #for testing remove this later
-        )
     print("Splitting the dataset...")
     four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})

 import fiftyone.utils.random as four
 import fiftyone.utils.huggingface as fouh
+#IMPLEMENT YOUR FUNCTIONS FOR DATA CURATION HERE, BELOW ARE JUST DUMMY FUNCTIONS AS EXAMPLES
+def shuffle_data(dataset):
+    """Shuffle the dataset"""
+    return dataset.shuffle(seed=51)
+def take_random_sample(dataset):
+    """Take a sample from the dataset"""
+    return dataset.take(size=10,seed=51)
+# DEFINE YOUR TRAINING HYPERPARAMETERS IN THIS DICTIONARY
+training_config = {
+    # Dataset split
+    "train_split": 0.9,
+    "val_split": 0.1,
+    # Training parameters
+    "train_params": {
+        "epochs": 1,
+        "batch": 16,
+        "imgsz": 640,
+        "lr0": 0.01,
+        "lrf": 0.01
+    }
+}
+# WRAP YOUR DATASET CURATION FUNCTIONS IN THIS FUNCTION
+def prepare_dataset(name="Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"):
+    """
+    Prepare the dataset for model training.
+    Args:
+        name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
+    Returns:
+        fiftyone.core.dataset.Dataset: The curated dataset.
+    Note:
+        The following code block MUST NOT be removed from your submission:
+        This ensures that only the approved dataset is used for the competition.
+    """
+    # DO NOT MODIFY THIS LINE
+    dataset = fouh.load_from_hub(name, split="train")
+    # WRAP YOUR DATA CURATION FUNCTIONS HERE
+    dataset = shuffle_data(dataset)
+    dataset = take_random_sample(dataset)
+    # DO NOT MODIFY BELOW THIS LINE
+    curated_dataset = dataset.clone(name="curated_dataset")
+    curated_dataset.persistent = True
+# DO NOT MODIFY THIS FUNCTION
 def export_to_yolo_format(
     samples,
     classes,
             split=split
         )
+# DO NOT MODIFY THIS FUNCTION
+def train_model(training_config):
     """
     Train the YOLO model on the given dataset using the provided configuration.
     """
     with open(config_path, 'r') as file:
         training_config = yaml.safe_load(file)
+    training_dataset = prepare_dataset()
     print("Splitting the dataset...")
     four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})