harpreetsahota
/

DCVAI-Example-1

Model card Files Files and versions Community

DCVAI-Example-1 / script.py

datascienceharp's picture

datascienceharp

updates

a04535e 7 months ago

3.51 kB

	"""
	Note: You don't need to modify this file as this script is used to train the model for the project.

	All of your work should be done in the data_curation.py script.

	You should import your main functions from the data_curation.py script and use them to prepare the dataset for training.

	The approved model is `yolov10m` from Ulytralytics.

	Your predictions must be in a label_field called "predictions" in the dataset.

	See here for more details about hyperparameters for this model: https://docs.ultralytics.com/modes/train/#train-settings
	"""
	import os

	import yaml

	import fiftyone as fo
	import fiftyone.utils.random as four
	import fiftyone.utils.huggingface as fouh

	from data_curation import prepare_dataset

	def export_to_yolo_format(
	samples,
	classes,
	label_field="ground_truth",
	export_dir=".",
	splits=["train", "val"]
	):
	"""
	Export samples to YOLO format, optionally handling multiple data splits.

	Args:
	samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
	export_dir (str): The directory where the exported data will be saved.
	classes (list): A list of class names for the YOLO format.
	label_field (str, optional): The field in the samples that contains the labels.
	Defaults to "ground_truth".
	splits (str, list, optional): The split(s) to export. Can be a single split name (str)
	or a list of split names. If None, all samples are exported as "val" split.
	Defaults to None.

	Returns:
	None

	"""
	if splits is None:
	splits = ["val"]
	elif isinstance(splits, str):
	splits = [splits]

	for split in splits:
	split_view = samples if split == "val" and splits == ["val"] else samples.match_tags(split)

	split_view.export(
	export_dir=export_dir,
	dataset_type=fo.types.YOLOv5Dataset,
	label_field=label_field,
	classes=classes,
	split=split
	)

	def train_model():
	"""
	Train the YOLO model on the given dataset using the provided configuration.
	"""

	script_dir = os.path.dirname(os.path.abspath(__file__))

	config_path = os.path.join(script_dir, 'training_config.yaml')

	with open(config_path, 'r') as file:
	training_config = yaml.safe_load(file)


	training_dataset = fouh.load_from_hub(
	"Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set",
	max_samples=100 #for testing remove this later
	)

	print("Splitting the dataset...")
	four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})
	print("Dataset split completed.")

	print("Exporting dataset to YOLO format...")
	export_to_yolo_format(
	samples=training_dataset,
	classes=training_dataset.default_classes,
	)
	print("Dataset export completed.")

	print("Initializing the YOLO model...")
	model = YOLO("yolov10m.pt")
	print("Model initialized.")

	print("Starting model training...")
	results = model.train(
	data="dataset.yaml",
	**training_config['train_params']
	)
	print("Model training completed.")

	best_model_path = str(results.save_dir / "weights/best.pt")
	print(f"Best model path: {best_model_path}")
	best_model = YOLO(best_model_path)
	print("Best model loaded.")

	print(f"Best model saved to: {best_model_path}")
	if __name__=="__main__":
	train_model()