{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.chdir('../')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# entity\n", "from dataclasses import dataclass\n", "from pathlib import Path\n", "\n", "@dataclass(frozen=True)\n", "class DataIngestionConfig:\n", " root_dir: Path\n", " source_URL: str\n", " local_data_file: Path\n", " unzip_dir: Path" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "from cnnClassifier.utils.common import read_yaml, create_directories\n", "from cnnClassifier.constant import *\n", "# Configuration\n", "class ConfigurationManager:\n", " def __init__(\n", " self,\n", " config_filepath = CONFIG_FILE_PATH,\n", " params_filepath = PARAMS_FILE_PATH\n", " ):\n", " self.config = read_yaml(config_filepath)\n", " self.params = read_yaml(params_filepath)\n", " \n", " create_directories([self.config.atifacts_root])\n", " \n", " \n", " \n", " def get_data_ingestion_config(self) -> DataIngestionConfig:\n", " config = self.config.data_ingestion\n", " create_directories([config.root_dir])\n", " \n", " data_ingestion_config = DataIngestionConfig(\n", " root_dir=config.root_dir,\n", " source_URL=config.source_URL,\n", " local_data_file=config.local_data_file,\n", " unzip_dir=config.unzip_dir\n", " )\n", " \n", " return data_ingestion_config\n", " \n", " \n", " " ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# components\n", "\n", "import os\n", "import zipfile\n", "import gdown\n", "from cnnClassifier import logger\n", "from cnnClassifier.utils.common import get_size" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "class DataIngestion:\n", " def __init__(self, config: DataIngestionConfig):\n", " self.config = config\n", " \n", " def download_file(self) -> str:\n", " try:\n", " dataset_url = self.config.source_URL\n", " zip_download_dir = self.config.local_data_file\n", " os.makedirs('artifacts/datasets', exist_ok=True)\n", " logger.info(f'Downloading data from {dataset_url} into {zip_download_dir}')\n", " \n", " file_id = dataset_url.split('/')[-2]\n", " prefix = 'https://drive.google.com/uc?/export=download&id='\n", " gdown.download(prefix + file_id, zip_download_dir)\n", " logger.info(f'Downloaded data from {dataset_url} into {zip_download_dir}')\n", " \n", " except Exception as e:\n", " raise e\n", " \n", " \n", " \n", " def extract_zip_file(self):\n", " \"\"\"\n", " zip_file_path: str\n", " Extracts the zip file into the data directory\n", " Function returns None\n", " \"\"\"\n", " unzip_path = self.config.unzip_dir\n", " os.makedirs(unzip_path, exist_ok=True)\n", " with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n", " zip_ref.extractall(unzip_path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 5%|▍ | 2.62M/57.7M [00:30<03:49, 240kB/s]" ] } ], "source": [ "try:\n", " config = ConfigurationManager()\n", " data_ingestion_config = config.get_data_ingestion_config()\n", " data_ingestion = DataIngestion(config=data_ingestion_config)\n", " data_ingestion.download_file()\n", " data_ingestion.extrat_zip_file()\n", "except Exception as e:\n", " raise e" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }