harpreetsahota
/

DCVAI-Example-1

Model card Files Files and versions Community

datascienceharp commited on Sep 3, 2024

Commit

a24e92a

1 Parent(s): 47bee20

updates

Browse files

Files changed (3) hide show

data_curation.py +0 -62
script.py +18 -5
training_config.yaml +0 -11

data_curation.py DELETED Viewed

@@ -1,62 +0,0 @@
-"""
-This script is used to curate the data for the project.
-Implement your functions to to clean the data and prepare it for model training.
-Note: the competition requires that you use FiftyOne for data curation and you are only allowed to
-use the approaved dataset from the hub, Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set, which can
-be found here: https://huggingface.co/datasets/Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set
-"""
-import fiftyone as fo
-import fiftyone.utils.huggingface as fouh
-# Implement functions for data curation. below are just dummy functions as examples
-def shuffle_data(dataset):
-    """Shuffle the dataset"""
-    return dataset.shuffle(seed=51)
-def take_random_sample(dataset):
-    """Take a sample from the dataset"""
-    return dataset.take(size=10,seed=51)
-def prepare_dataset(name="Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"):
-    """
-    Prepare the dataset for model training.
-    Args:
-        name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
-    Returns:
-        fiftyone.core.dataset.Dataset: The curated dataset.
-    Raises:
-        ValueError: If the provided dataset name is not the approved one.
-    Note:
-        The following code block MUST NOT be removed from your submission:
-        APPROVED_DATASET = "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"
-        if name != APPROVED_DATASET:
-            raise ValueError(f"Only the approved dataset '{APPROVED_DATASET}' is allowed for this competition.")
-        This ensures that only the approved dataset is used for the competition.
-    """
-    APPROVED_DATASET = "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"
-    if name != APPROVED_DATASET:
-        raise ValueError(f"Only the approved dataset '{APPROVED_DATASET}' is allowed for this competition.")
-    # Load the approved dataset from the hub
-    dataset = fouh.load_from_hub(name, split="train")
-    # Implement your data curation functions here
-    dataset = shuffle_data(dataset)
-    dataset = take_random_sample(dataset)
-    # Return the curated dataset
-    curated_dataset = dataset.clone(name="curated_dataset")
-    curated_dataset.persistent = True

script.py CHANGED Viewed

@@ -49,7 +49,9 @@ training_config = {
 # WRAP YOUR DATASET CURATION FUNCTIONS IN THIS FUNCTION
 def prepare_dataset(name="Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"):
     """
-    Prepare the dataset for model training.
     Args:
         name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
@@ -86,6 +88,8 @@ def export_to_yolo_format(
     """
     Export samples to YOLO format, optionally handling multiple data splits.
     Args:
         samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
         export_dir (str): The directory where the exported data will be saved.
@@ -117,9 +121,11 @@ def export_to_yolo_format(
         )
 # DO NOT MODIFY THIS FUNCTION
-def train_model(training_config):
     """
     Train the YOLO model on the given dataset using the provided configuration.
     """
     script_dir = os.path.dirname(os.path.abspath(__file__))
@@ -132,32 +138,39 @@ def train_model(training_config):
     training_dataset = prepare_dataset()
     print("Splitting the dataset...")
     four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
     print("Dataset split completed.")
     print("Exporting dataset to YOLO format...")
     export_to_yolo_format(
         samples=training_dataset,
         classes=training_dataset.default_classes,
     )
     print("Dataset export completed.")
     print("Initializing the YOLO model...")
     model = YOLO("yolov10m.pt")
     print("Model initialized.")
     print("Starting model training...")
     results = model.train(
         data="dataset.yaml",
         **training_config['train_params']
     )
     print("Model training completed.")
     best_model_path = str(results.save_dir / "weights/best.pt")
-    print(f"Best model path: {best_model_path}")
-    best_model = YOLO(best_model_path)
-    print("Best model loaded.")
     print(f"Best model saved to: {best_model_path}")
 if __name__=="__main__":
     train_model()

 # WRAP YOUR DATASET CURATION FUNCTIONS IN THIS FUNCTION
 def prepare_dataset(name="Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set"):
     """
+    Prepare the dataset for model training.
+    NOTE: You there are lines you must not modify in this function. They are marked with "DO NOT MODIFY".
     Args:
         name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set".
     """
     Export samples to YOLO format, optionally handling multiple data splits.
+    NOTE: DO NOT MODIFY THIS FUNCTION.
     Args:
         samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
         export_dir (str): The directory where the exported data will be saved.
         )
 # DO NOT MODIFY THIS FUNCTION
+def train_model(training_config=training_config):
     """
     Train the YOLO model on the given dataset using the provided configuration.
+    NOTE: DO NOT MODIFY THIS FUNCTION.
     """
     script_dir = os.path.dirname(os.path.abspath(__file__))
     training_dataset = prepare_dataset()
     print("Splitting the dataset...")
     four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
     print("Dataset split completed.")
     print("Exporting dataset to YOLO format...")
     export_to_yolo_format(
         samples=training_dataset,
         classes=training_dataset.default_classes,
     )
     print("Dataset export completed.")
     print("Initializing the YOLO model...")
     model = YOLO("yolov10m.pt")
     print("Model initialized.")
     print("Starting model training...")
     results = model.train(
         data="dataset.yaml",
         **training_config['train_params']
     )
     print("Model training completed.")
     best_model_path = str(results.save_dir / "weights/best.pt")
     print(f"Best model saved to: {best_model_path}")
+# DO NOT MODIFY THE BELOW
 if __name__=="__main__":
     train_model()

training_config.yaml DELETED Viewed

@@ -1,11 +0,0 @@
-# Dataset split
-train_split: 0.9
-val_split: 0.1
-# Training parameters
-train_params:
-  epochs: 1
-  batch: 16
-  imgsz: 640
-  lr0: 0.01
-  lrf: 0.01