{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.chdir('../')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'c:\\\\mlops project\\\\image-colorization-mlops'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%pwd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from dataclasses import dataclass\n", "from pathlib import Path\n", "\n", "@dataclass(frozen=True)\n", "class DataIngestionConfig:\n", " root_dir : Path\n", " source_dir : Path\n", " local_data_file: Path\n", " unzip_dir : Path" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from src.imagecolorization.constants import *\n", "from src.imagecolorization.utils.common import read_yaml, create_directories\n", "\n", "class ConfigurationManager:\n", " def __init__(\n", " self,\n", " config_filepath = CONFIG_FILE_PATH,\n", " params_filepath = PARAMS_FILE_PATH):\n", "\n", " self.config = read_yaml(config_filepath)\n", " self.params = read_yaml(params_filepath)\n", "\n", " create_directories([self.config.artifacts_root])\n", "\n", " \n", "\n", " def get_data_ingestion_config(self) -> DataIngestionConfig:\n", " config = self.config.data_ingestion\n", "\n", " create_directories([config.root_dir])\n", "\n", " data_ingestion_config = DataIngestionConfig(\n", " root_dir=config.root_dir,\n", " source_dir=config.source_dir,\n", " local_data_file=config.local_data_file,\n", " unzip_dir=config.unzip_dir \n", " )\n", "\n", " return data_ingestion_config\n", " \n", " \n", " " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import os\n", "import zipfile\n", "from src.imagecolorization.logging import logger\n", "from tqdm.notebook import tqdm\n", "from dataclasses import replace\n", "\n", "class DataIngestion:\n", " def __init__(self, config: DataIngestionConfig):\n", " self.config = config\n", " \n", " def load_file(self):\n", " if os.path.exists(self.config.source_dir):\n", " self.config = replace(self.config, local_data_file=self.config.source_dir)\n", " logger.info(f'File Found at: {self.config.local_data_file}')\n", " else:\n", " logger.info(f\"File not found at {self.config.source_dir}\")\n", " raise FileNotFoundError(f'No file found at {self.config.source_dir}')\n", " \n", " \n", " \n", " def extract_zip_file(self):\n", " unzip_path = self.config.unzip_dir\n", " os.makedirs(unzip_path, exist_ok=True)\n", " \n", " # open the zip file\n", " with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n", " total_files = len(zip_ref.infolist())\n", " \n", " for file in tqdm(iterable=zip_ref.infolist(), total=total_files, desc='Extracting files'):\n", " zip_ref.extract(member=file, path=unzip_path)\n", " \n", " logger.info(f'Extacted {self.config.local_data_file} to {unzip_path}')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2024-08-18 02:08:07,443: INFO: common: yaml file: config\\config.yaml loaded successfully]\n", "[2024-08-18 02:08:07,444: INFO: common: yaml file: params.yaml loaded successfully]\n", "[2024-08-18 02:08:07,445: INFO: common: created directory at: artifacts]\n", "[2024-08-18 02:08:07,446: INFO: common: created directory at: artifacts/data_ingestion]\n", "[2024-08-18 02:08:07,446: INFO: 2749353352: File Found at: C:\\\\mlops project\\\\archive.zip]\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b91326c533ac4f588a5224910549cd65", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Extracting files: 0%| | 0/5 [00:00