diff --git a/.gitignore b/.gitignore
index d7c9832ff3c60eeb0b764a3b8d40fa4dad2cfee5..3caf1cb325691fa745eaf8be605822d0b8fa7443 100644
--- a/.gitignore
+++ b/.gitignore
@@ -87,3 +87,5 @@ target/
# Mypy cache
.mypy_cache/
+
+/models
\ No newline at end of file
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 54969b4a2e78e3a1b01f190d0f9519e3e346986a..0000000000000000000000000000000000000000
--- a/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-.PHONY: clean data lint requirements sync_data_to_s3 sync_data_from_s3
-
diff --git a/README.md b/README.md
index fb2178f99baadbf4a0d1b2355db42ba940933c77..0e0b331c063ccdf81fb3c5ee7676bb1de6f14331 100644
--- a/README.md
+++ b/README.md
@@ -1,59 +1,76 @@
-Image Colorization
+## Image Colorization
==============================
-A short description of the project.
+An deep learning based Image Colorization project.
-Project Organization
-------------
+## FINDINGS
+- the task we want to learn is `image-colorization` but we can accompolish that by doing different types of tasks, I call these **sub-task**, in our content they could be like `regression based image colorization`, `classification(by binning) based colorization`, `GAN based colorization`, `image colorization + scene classication(Let there be colors research paper did this)`.
+- based on analysis and while I was trying to come up with a project file structure I came to know that the data, model, loss, metrics, dataloader all these are very coupled while dealing with a particular task(`image-colorization`) but when we talk about a **sub-task** we have much more freedom.
+- within a sub-task(e.g., regression-unet-learner) we already made a set of rules and now we can use different models without changing the data, or we can change different datasets while using the same model, **so it is important to fix the sub-task we want to do first.**
+- so making a folder for each sub-task seems right as a sub-task has high cohesion and no coupling with any other sub-task.
+
+## RULES
+- use **lower_snake_case** for **functions**
+- use **lower_snake_case** for **file_name & folder names**
+- use **UpperCamelCase** for **class names**
+- **sub-task** name should be in **lower-kebab-case**
+## Project File Structure
+------------
+ .
├── LICENSE
- ├── Makefile <- Makefile with commands like `make data` or `make train`
├── README.md <- The top-level README for developers using this project.
- ├── data
- │ ├── external <- Data from third party sources.
- │ ├── interim <- Intermediate data that has been transformed.
- │ ├── processed <- The final, canonical data sets for modeling.
- │ └── raw <- The original, immutable data dump.
- │
- ├── docs <- A default Sphinx project; see sphinx-doc.org for details
- │
- ├── models <- Trained and serialized models, model predictions, or model summaries
- │
- ├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
- │ the creator's initials, and a short `-` delimited description, e.g.
- │ `1.0-jqp-initial-data-exploration`.
- │
- ├── references <- Data dictionaries, manuals, and all other explanatory materials.
- │
- ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
- │ └── figures <- Generated graphics and figures to be used in reporting
- │
- ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
- │ generated with `pip freeze > requirements.txt`
- │
- ├── setup.py <- makes project pip installable (pip install -e .) so src can be imported
- ├── src <- Source code for use in this project.
- │ ├── __init__.py <- Makes src a Python module
- │ │
- │ ├── data <- Scripts to download or generate data
- │ │ └── make_dataset.py
- │ │
- │ ├── features <- Scripts to turn raw data into features for modeling
- │ │ └── build_features.py
- │ │
- │ ├── models <- Scripts to train models and then use trained models to make
- │ │ │ predictions
- │ │ ├── predict_model.py
- │ │ └── train_model.py
- │ │
- │ └── visualization <- Scripts to create exploratory and results oriented visualizations
- │ └── visualize.py
- │
- └── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io
-
-
+ ├── data/
+ │ ├── external <- Data from third party sources.
+ │ ├── interim <- Intermediate data that has been transformed.
+ │ ├── processed <- The final, canonical data sets for modeling.
+ │ └── raw <- The original, immutable data dump.
+ ├── models/ <- Trained models
+ ├── notebooks/ <- Jupyter notebooks
+ ├── configs/
+ │ ├── experiment1.yaml
+ │ ├── experiment2.yaml
+ │ ├── experiment3.yaml
+ │ └── ...
+ └── src/
+ ├── sub_task_1/
+ │ ├── validate_config.py
+ │ ├── data/
+ │ │ ├── register_datasets.py
+ │ │ ├── datasets/
+ │ │ │ ├── dataset1.py
+ │ │ │ └── dataset2.py
+ │ ├── model/
+ │ │ ├── base_model_interface.py
+ │ │ ├── register_models.py
+ │ │ ├── models/
+ │ │ │ ├── simple_model.py
+ │ │ │ └── complex_model.py
+ │ │ ├── losses.py
+ │ │ ├── metrics.py
+ │ │ ├── callbacks.py
+ │ │ └── dataloader.py
+ │ └── scripts/
+ │ ├── create_dataset.py
+ │ └── create_model.py
+ ├── sub_task_2/
+ │ └── ...
+ ├── sub_task_3/
+ │ └── ...
+ ├── scripts/
+ │ ├── create_sub_task.py
+ │ ├── prepare_dataset.py
+ │ ├── visualize_dataset.py
+ │ ├── visualize_results.py
+ │ ├── train.py
+ │ ├── evaluate.py
+ │ └── inference.py
+ └── utils/
+ ├── data_utils.py
+ └── model_utils.py
--------
+
Project based on the cookiecutter data science project template . #cookiecutterdatascience
diff --git a/command.py b/command.py
deleted file mode 100644
index b568fc212fc731d28436043e827dbf8117f598e1..0000000000000000000000000000000000000000
--- a/command.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import argparse
-import sys
-import os
-
-# parser = argparse.ArgumentParser()
-# parser.add_argument("category")
-# parser.add_argument("subcommand-args")
-# args = parser.parse_args()
-args = sys.argv
-
-# remove "command.py"
-args = args[1:]
-
-# print(args)
-subcommand = args[0].lower()
-
-subcommand_args = " ".join(args[1:])
-if subcommand=="data":
- command = "py src/data/make_dataset.py "+subcommand_args
- # print(command)
- os.system(command)
-else:
- print("subcommand not supported.")
-
-# os.system("py src/__init__.py")
-"""
-download the dataset: data download
-preprocess dataset: data prepare
-visualize dataset: data show
-delete raw & interim dataset dir: data delete --cache
-delete all dataset dir: data delete --all
-
-
-train model: model train
-evaluate model: model evaluate
-inference with model: model predict --image test.jpg --folder images/ -d results/
-
-
-
-"""
\ No newline at end of file
diff --git a/config.yaml b/config.yaml
deleted file mode 100644
index 633985a8efc83dee5902f41a2fcc345e7f2cfe46..0000000000000000000000000000000000000000
--- a/config.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-raw_dataset_dir: data/raw/
-interim_dataset_dir: data/interim/
-processed_dataset_dir: data/processed/
-
-# forests or pascal-voc
-dataset: forests
-
-image_size: 224
-train_size: 0.8
-shuffle: False
-batch_size: 16
-
-seed: 324
diff --git a/configs/experiment1.yaml b/configs/experiment1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b1c70c36f43b6aafe2e03d11d7b6aca2087ab01
--- /dev/null
+++ b/configs/experiment1.yaml
@@ -0,0 +1,14 @@
+# mandatory
+task: simple_regression_colorization
+dataset: forests
+model: model_v1
+
+# common parameters
+seed: 324
+train_size: 0.8
+image_size: 224
+shuffle: False
+
+# training related
+batch_size: 16
+epochs: 10
\ No newline at end of file
diff --git a/constants.yaml b/constants.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d7d65c69bee5b814782a069585705b5a075a5ced
--- /dev/null
+++ b/constants.yaml
@@ -0,0 +1,3 @@
+RAW_DATASET_DIR: data/raw/
+INTERIM_DATASET_DIR: data/interim/
+PROCESSED_DATASET_DIR: data/processed/
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index 56a5c29417acdeb28cb9946998c5d9ff47ff32b9..0000000000000000000000000000000000000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,153 +0,0 @@
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS =
-SPHINXBUILD = sphinx-build
-PAPER =
-BUILDDIR = _build
-
-# Internal variables.
-PAPEROPT_a4 = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
-
-help:
- @echo "Please use \`make ' where is one of"
- @echo " html to make standalone HTML files"
- @echo " dirhtml to make HTML files named index.html in directories"
- @echo " singlehtml to make a single large HTML file"
- @echo " pickle to make pickle files"
- @echo " json to make JSON files"
- @echo " htmlhelp to make HTML files and a HTML help project"
- @echo " qthelp to make HTML files and a qthelp project"
- @echo " devhelp to make HTML files and a Devhelp project"
- @echo " epub to make an epub"
- @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
- @echo " latexpdf to make LaTeX files and run them through pdflatex"
- @echo " text to make text files"
- @echo " man to make manual pages"
- @echo " texinfo to make Texinfo files"
- @echo " info to make Texinfo files and run them through makeinfo"
- @echo " gettext to make PO message catalogs"
- @echo " changes to make an overview of all changed/added/deprecated items"
- @echo " linkcheck to check all external links for integrity"
- @echo " doctest to run all doctests embedded in the documentation (if enabled)"
-
-clean:
- -rm -rf $(BUILDDIR)/*
-
-html:
- $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-dirhtml:
- $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-singlehtml:
- $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
- @echo
- @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-pickle:
- $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
- @echo
- @echo "Build finished; now you can process the pickle files."
-
-json:
- $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
- @echo
- @echo "Build finished; now you can process the JSON files."
-
-htmlhelp:
- $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
- @echo
- @echo "Build finished; now you can run HTML Help Workshop with the" \
- ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-qthelp:
- $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
- @echo
- @echo "Build finished; now you can run "qcollectiongenerator" with the" \
- ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
- @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/project_name.qhcp"
- @echo "To view the help file:"
- @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/project_name.qhc"
-
-devhelp:
- $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
- @echo
- @echo "Build finished."
- @echo "To view the help file:"
- @echo "# mkdir -p $$HOME/.local/share/devhelp/project_name"
- @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/project_name"
- @echo "# devhelp"
-
-epub:
- $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
- @echo
- @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-latex:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo
- @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
- @echo "Run \`make' in that directory to run these through (pdf)latex" \
- "(use \`make latexpdf' here to do that automatically)."
-
-latexpdf:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through pdflatex..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-text:
- $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
- @echo
- @echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-man:
- $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
- @echo
- @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-texinfo:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo
- @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
- @echo "Run \`make' in that directory to run these through makeinfo" \
- "(use \`make info' here to do that automatically)."
-
-info:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo "Running Texinfo files through makeinfo..."
- make -C $(BUILDDIR)/texinfo info
- @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-gettext:
- $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
- @echo
- @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-changes:
- $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
- @echo
- @echo "The overview file is in $(BUILDDIR)/changes."
-
-linkcheck:
- $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
- @echo
- @echo "Link check complete; look for any errors in the above output " \
- "or in $(BUILDDIR)/linkcheck/output.txt."
-
-doctest:
- $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
- @echo "Testing of doctests in the sources finished, look at the " \
- "results in $(BUILDDIR)/doctest/output.txt."
diff --git a/docs/commands.rst b/docs/commands.rst
deleted file mode 100644
index 2d162f36724fe7e2f1d1e8cedbe8af567dcdd21d..0000000000000000000000000000000000000000
--- a/docs/commands.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-Commands
-========
-
-The Makefile contains the central entry points for common tasks related to this project.
-
-Syncing data to S3
-^^^^^^^^^^^^^^^^^^
-
-* `make sync_data_to_s3` will use `aws s3 sync` to recursively sync files in `data/` up to `s3://[OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')/data/`.
-* `make sync_data_from_s3` will use `aws s3 sync` to recursively sync files from `s3://[OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')/data/` to `data/`.
diff --git a/docs/conf.py b/docs/conf.py
deleted file mode 100644
index 87b240c0bf39489ab3e4715a03f5fc6ec70b76a3..0000000000000000000000000000000000000000
--- a/docs/conf.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# project_name documentation build configuration file, created by
-# sphinx-quickstart.
-#
-# This file is execfile()d with the current directory set to its containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-import os
-import sys
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-# sys.path.insert(0, os.path.abspath('.'))
-
-# -- General configuration -----------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be extensions
-# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = []
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix of source filenames.
-source_suffix = '.rst'
-
-# The encoding of source files.
-# source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'project_name'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = '0.1'
-# The full version, including alpha/beta/rc tags.
-release = '0.1'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-# language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-# today = ''
-# Else, today_fmt is used as the format for a strftime call.
-# today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-exclude_patterns = ['_build']
-
-# The reST default role (used for this markup: `text`) to use for all documents.
-# default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-# add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-# add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-# show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-# modindex_common_prefix = []
-
-
-# -- Options for HTML output ---------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-html_theme = 'default'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further. For a list of options available for each theme, see the
-# documentation.
-# html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-# html_theme_path = []
-
-# The name for this set of Sphinx documents. If None, it defaults to
-# " v documentation".
-# html_title = None
-
-# A shorter title for the navigation bar. Default is the same as html_title.
-# html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-# html_logo = None
-
-# The name of an image file (within the static path) to use as favicon of the
-# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-# html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
-# using the given strftime format.
-# html_last_updated_fmt = '%b %d, %Y'
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-# html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-# html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-# html_additional_pages = {}
-
-# If false, no module index is generated.
-# html_domain_indices = True
-
-# If false, no index is generated.
-# html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-# html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-# html_show_sourcelink = True
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-# html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-# html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a tag referring to it. The value of this option must be the
-# base URL from which the finished HTML is served.
-# html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-# html_file_suffix = None
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'project_namedoc'
-
-
-# -- Options for LaTeX output --------------------------------------------------
-
-latex_elements = {
- # The paper size ('letterpaper' or 'a4paper').
- # 'papersize': 'letterpaper',
-
- # The font size ('10pt', '11pt' or '12pt').
- # 'pointsize': '10pt',
-
- # Additional stuff for the LaTeX preamble.
- # 'preamble': '',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title, author, documentclass [howto/manual]).
-latex_documents = [
- ('index',
- 'project_name.tex',
- u'project_name Documentation',
- u"Your name (or your organization/company/team)", 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-# latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-# latex_use_parts = False
-
-# If true, show page references after internal links.
-# latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-# latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-# latex_appendices = []
-
-# If false, no module index is generated.
-# latex_domain_indices = True
-
-
-# -- Options for manual page output --------------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
- ('index', 'project_name', u'project_name Documentation',
- [u"Your name (or your organization/company/team)"], 1)
-]
-
-# If true, show URL addresses after external links.
-# man_show_urls = False
-
-
-# -- Options for Texinfo output ------------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-# dir menu entry, description, category)
-texinfo_documents = [
- ('index', 'project_name', u'project_name Documentation',
- u"Your name (or your organization/company/team)", 'project_name',
- 'A short description of the project.', 'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-# texinfo_appendices = []
-
-# If false, no module index is generated.
-# texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-# texinfo_show_urls = 'footnote'
diff --git a/docs/getting-started.rst b/docs/getting-started.rst
deleted file mode 100644
index b4f71c3a293b7c30dbb94afd6f3e58997b55ceef..0000000000000000000000000000000000000000
--- a/docs/getting-started.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Getting started
-===============
-
-This is where you describe how to get set up on a clean install, including the
-commands necessary to get the raw data (using the `sync_data_from_s3` command,
-for example), and then how to make the cleaned, final data sets.
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index 3302c62623ca64b043f90de2f47b19cd7eea331c..0000000000000000000000000000000000000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,24 +0,0 @@
-.. project_name documentation master file, created by
- sphinx-quickstart.
- You can adapt this file completely to your liking, but it should at least
- contain the root `toctree` directive.
-
-project_name documentation!
-==============================================
-
-Contents:
-
-.. toctree::
- :maxdepth: 2
-
- getting-started
- commands
-
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index b9cc86d370d9590434d52f710a8bb3022a59cded..0000000000000000000000000000000000000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,190 +0,0 @@
-@ECHO OFF
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
- set SPHINXBUILD=sphinx-build
-)
-set BUILDDIR=_build
-set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
-set I18NSPHINXOPTS=%SPHINXOPTS% .
-if NOT "%PAPER%" == "" (
- set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
- set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
-)
-
-if "%1" == "" goto help
-
-if "%1" == "help" (
- :help
- echo.Please use `make ^` where ^ is one of
- echo. html to make standalone HTML files
- echo. dirhtml to make HTML files named index.html in directories
- echo. singlehtml to make a single large HTML file
- echo. pickle to make pickle files
- echo. json to make JSON files
- echo. htmlhelp to make HTML files and a HTML help project
- echo. qthelp to make HTML files and a qthelp project
- echo. devhelp to make HTML files and a Devhelp project
- echo. epub to make an epub
- echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
- echo. text to make text files
- echo. man to make manual pages
- echo. texinfo to make Texinfo files
- echo. gettext to make PO message catalogs
- echo. changes to make an overview over all changed/added/deprecated items
- echo. linkcheck to check all external links for integrity
- echo. doctest to run all doctests embedded in the documentation if enabled
- goto end
-)
-
-if "%1" == "clean" (
- for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
- del /q /s %BUILDDIR%\*
- goto end
-)
-
-if "%1" == "html" (
- %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/html.
- goto end
-)
-
-if "%1" == "dirhtml" (
- %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
- goto end
-)
-
-if "%1" == "singlehtml" (
- %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
- goto end
-)
-
-if "%1" == "pickle" (
- %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can process the pickle files.
- goto end
-)
-
-if "%1" == "json" (
- %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can process the JSON files.
- goto end
-)
-
-if "%1" == "htmlhelp" (
- %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can run HTML Help Workshop with the ^
-.hhp project file in %BUILDDIR%/htmlhelp.
- goto end
-)
-
-if "%1" == "qthelp" (
- %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can run "qcollectiongenerator" with the ^
-.qhcp project file in %BUILDDIR%/qthelp, like this:
- echo.^> qcollectiongenerator %BUILDDIR%\qthelp\project_name.qhcp
- echo.To view the help file:
- echo.^> assistant -collectionFile %BUILDDIR%\qthelp\project_name.ghc
- goto end
-)
-
-if "%1" == "devhelp" (
- %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished.
- goto end
-)
-
-if "%1" == "epub" (
- %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The epub file is in %BUILDDIR%/epub.
- goto end
-)
-
-if "%1" == "latex" (
- %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
- goto end
-)
-
-if "%1" == "text" (
- %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The text files are in %BUILDDIR%/text.
- goto end
-)
-
-if "%1" == "man" (
- %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The manual pages are in %BUILDDIR%/man.
- goto end
-)
-
-if "%1" == "texinfo" (
- %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
- goto end
-)
-
-if "%1" == "gettext" (
- %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
- goto end
-)
-
-if "%1" == "changes" (
- %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
- if errorlevel 1 exit /b 1
- echo.
- echo.The overview file is in %BUILDDIR%/changes.
- goto end
-)
-
-if "%1" == "linkcheck" (
- %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
- if errorlevel 1 exit /b 1
- echo.
- echo.Link check complete; look for any errors in the above output ^
-or in %BUILDDIR%/linkcheck/output.txt.
- goto end
-)
-
-if "%1" == "doctest" (
- %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
- if errorlevel 1 exit /b 1
- echo.
- echo.Testing of doctests in the sources finished, look at the ^
-results in %BUILDDIR%/doctest/output.txt.
- goto end
-)
-
-:end
diff --git a/references/.gitkeep b/references/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/reports/.gitkeep b/reports/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/reports/figures/.gitkeep b/reports/figures/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/requirements.txt b/requirements.txt
index 849abad5e1555ae4cb1457bb9eb9c1c7f0641bf6..db3b7e0af11ab246b2d034fb4b1094cb2d27ba42 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
huggingface_hub
comet_ml
-scikit-image
\ No newline at end of file
+scikit-image
+cerberus
\ No newline at end of file
diff --git a/setup.py b/setup.py
index dc3d4c9ad190853fa951d1a4d4e740710c16f8a5..baa65f16809763b4b7b73086bf9332075a04ee03 100644
--- a/setup.py
+++ b/setup.py
@@ -7,4 +7,4 @@ setup(
description='A short description of the project.',
author='Your name (or your organization/company/team)',
license='MIT',
-)
+)
\ No newline at end of file
diff --git a/src/__init__.py b/src/__init__.py
deleted file mode 100644
index 40fa3304c2dd1342325631540f96ed10c955b97d..0000000000000000000000000000000000000000
--- a/src/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from src.utils import Config
-from pathlib import Path
-
-config = Config("config.yaml")
-# config.raw_dataset_dir = Path(config.raw_dataset_dir)
-# config.interim_dataset_dir = Path(config.interim_dataset_dir)
-# config.processed_dataset_dir = Path(config.processed_dataset_dir)
-
-# print(config)
\ No newline at end of file
diff --git a/src/data/.gitkeep b/src/data/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/data/__init__.py b/src/data/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/data/make_dataset.py b/src/data/make_dataset.py
deleted file mode 100644
index 26e61f2203200d52a4edf42c724021a4ded86405..0000000000000000000000000000000000000000
--- a/src/data/make_dataset.py
+++ /dev/null
@@ -1,128 +0,0 @@
-from huggingface_hub import snapshot_download
-import os,sys;sys.path.append(os.getcwd())
-from src import config
-from src.utils import *
-import argparse
-from pathlib import Path
-from zipfile import ZipFile
-from glob import glob
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-from tqdm import tqdm
-import shutil
-from src.data.visualize_dataset import visualize_dataset
-
-def download_dataset():
- """Used to download dataset from hugging face
- """
- print_title(f"Downloading {config.dataset} dataset from hugging face")
- snapshot_download(repo_id="Anuj-Panthri/Image-Colorization-Datasets",
- repo_type="dataset",
- local_dir=config.raw_dataset_dir,
- allow_patterns=f"{config.dataset}/*")
-
-
-def unzip_dataset():
- print_title(f"Unzipping dataset")
- print("Extracting to :",Path(config.interim_dataset_dir)/Path("trainval/"))
- with ZipFile(Path(config.raw_dataset_dir)/Path(f"{config.dataset}/trainval.zip"),"r") as zip:
- zip.extractall(Path(config.interim_dataset_dir)/Path("trainval/"))
-
- print("Extracting to :",Path(config.interim_dataset_dir)/Path("test/"))
- with ZipFile(Path(config.raw_dataset_dir)/Path(f"{config.dataset}/test.zip"),"r") as zip:
- zip.extractall(Path(config.interim_dataset_dir)/Path("test/"))
-
-
-def clean_dataset():
- print_title("CLEANING DATASET")
- trainval_dir = Path(config.interim_dataset_dir) / Path("trainval/")
- test_dir = Path(config.interim_dataset_dir) / Path("test/")
-
- trainval_paths = glob(str(trainval_dir/Path("*")))
- test_paths = glob(str(test_dir/Path("*")))
-
- print("train,test: ",len(trainval_paths),",",len(test_paths),sep="")
-
-
- def clean(image_paths,destination_dir):
- if os.path.exists(destination_dir): shutil.rmtree(destination_dir)
- os.makedirs(destination_dir)
- for i in tqdm(range(len(image_paths))):
- img = cv2.imread(image_paths[i])
- img = cv2.resize(img,[128,128])
- if not is_bw(img):
- shutil.copy(trainval_paths[i],
- destination_dir)
- print("saved to:",destination_dir)
-
- destination_dir = Path(config.processed_dataset_dir)/Path("trainval/")
- clean(trainval_paths,destination_dir)
-
- destination_dir = Path(config.processed_dataset_dir)/Path("test/")
- clean(test_paths,destination_dir)
-
- trainval_dir = Path(config.processed_dataset_dir) / Path("trainval/")
- test_dir = Path(config.processed_dataset_dir) / Path("test/")
-
- trainval_paths = glob(str(trainval_dir/Path("*")))
- test_paths = glob(str(test_dir/Path("*")))
-
- print("after cleaning train,test: ",len(trainval_paths),",",len(test_paths),sep="")
-
-
-def prepare_dataset():
- print_title(f"Preparing dataset")
- download_dataset()
- unzip_dataset()
- clean_dataset()
-
-def delete_cache():
- ## clean old interim and raw datasets
- print_title("deleting unused raw and interim dataset dirs")
- if os.path.exists(config.raw_dataset_dir):
- shutil.rmtree(config.raw_dataset_dir)
- if os.path.exists(config.interim_dataset_dir):
- shutil.rmtree(config.interim_dataset_dir)
-
-def delete_all():
- ## clean all datasets
- print_title("deleting all dataset dirs")
- if os.path.exists(config.raw_dataset_dir):
- shutil.rmtree(config.raw_dataset_dir)
- if os.path.exists(config.interim_dataset_dir):
- shutil.rmtree(config.interim_dataset_dir)
- if os.path.exists(config.processed_dataset_dir):
- shutil.rmtree(config.processed_dataset_dir)
-
-
-if __name__=="__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("command")
- parser.add_argument("-d","--dataset",default="forests")
- parser.add_argument("--cache",action="store_true",default=True)
- parser.add_argument("--all",action="store_true")
-
- """
- prepare dataset: data prepare
- visualize dataset: data show
- delete raw & interim dataset dir: data delete --cache
- delete all dataset dir: data delete --all
- """
-
- args = parser.parse_args()
- # print(args)
-
- if args.command=="prepare":
- prepare_dataset()
-
- elif args.command=="show":
- visualize_dataset()
-
- elif args.command=="delete":
- if(args.all): delete_all()
- elif(args.cache): delete_cache()
-
- else:
- print("unsupported")
-
diff --git a/src/data/visualize_dataset.py b/src/data/visualize_dataset.py
deleted file mode 100644
index 45e36228ee9340f5bb296db8dc5028370d023901..0000000000000000000000000000000000000000
--- a/src/data/visualize_dataset.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import os,sys;sys.path.append(os.getcwd())
-from src.data.load_dataset import get_ds,get_datasets
-from src import config
-from src.utils import *
-import matplotlib.pyplot as plt
-import cv2
-import math
-
-def see_batch(L_batch,AB_batch,show_L=False,cols=4,row_size=5,col_size=5,title=None):
- n = L_batch.shape[0]
- rows = math.ceil(n/cols)
- fig = plt.figure(figsize=(col_size*cols,row_size*rows))
- if title:
- plt.title(title)
- plt.axis("off")
-
- for i in range(n):
- fig.add_subplot(rows,cols,i+1)
- L,AB = L_batch[i],AB_batch[i]
- L,AB = rescale_L(L), rescale_AB(AB)
-# print(L.shape,AB.shape)
- img = np.concatenate([L,AB],axis=-1)
- img = cv2.cvtColor(img,cv2.COLOR_LAB2RGB)*255
-# print(img.min(),img.max())
- if show_L:
- L = np.tile(L,(1,1,3))/100*255
- img = np.concatenate([L,img],axis=1)
- plt.imshow(img.astype("uint8"))
- plt.show()
-
-
-def visualize_dataset():
- train_ds,val_ds,test_ds = get_datasets()
- L_batch,AB_batch = next(iter(train_ds))
- L_batch,AB_batch = L_batch.numpy(), AB_batch.numpy()
- see_batch(L_batch,
- AB_batch,
- title="training dataset")
-
- L_batch,AB_batch = next(iter(val_ds))
- L_batch,AB_batch = L_batch.numpy(), AB_batch.numpy()
- see_batch(L_batch,
- AB_batch,
- title="validation dataset")
-
- L_batch,AB_batch = next(iter(test_ds))
- L_batch,AB_batch = L_batch.numpy(), AB_batch.numpy()
- see_batch(L_batch,
- AB_batch,
- title="testing dataset")
-
-
diff --git a/src/features/.gitkeep b/src/features/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/features/__init__.py b/src/features/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/features/build_features.py b/src/features/build_features.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/models/.gitkeep b/src/models/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/models/__init__.py b/src/models/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/models/predict_model.py b/src/models/predict_model.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/models/train_model.py b/src/models/train_model.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/scripts/create_sub_task.py b/src/scripts/create_sub_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f85de1bad5688a467493f05175cb977b5086240
--- /dev/null
+++ b/src/scripts/create_sub_task.py
@@ -0,0 +1,274 @@
+import os,shutil
+import argparse
+
+def create_file(file_path,file_content):
+ with open(file_path,"w") as f:
+ f.write(file_content)
+
+def create_data(data_dir,dataset_name,sub_task_dir):
+ # call src/sub_task/scripts/create_dataset.py dataset_name
+ os.system(f"python {sub_task_dir}/scripts/create_dataset.py {dataset_name}")
+
+ register_datasets_file_path = os.path.join(data_dir,"register_datasets.py")
+ create_file(register_datasets_file_path,
+f"""# register your datasets here
+
+datasets = ["{dataset_name}"]
+
+""")
+
+
+
+def create_model(model_dir:str, model_name:str, sub_task_dir:str):
+ base_model_interface_path = os.path.join(model_dir,"base_model_interface.py")
+
+ create_file(base_model_interface_path,
+"""import numpy as np
+from abc import ABC, abstractmethod
+
+# BaseModel Abstract class
+# all the models within this sub_task must inherit this class
+
+class BaseModel(ABC):
+ @abstractmethod
+ def train(self):
+ pass
+
+ @abstractmethod
+ def predict(self,inputs):
+ pass
+""")
+
+
+ # call src/sub_task/scripts/create_model.py model_name
+ os.system(f"python {sub_task_dir}/scripts/create_model.py {model_name}")
+
+
+ register_models_path = os.path.join(model_dir,"register_models.py")
+ create_file(register_models_path,
+f"""# register models of this sub_task here
+models = ["{model_name}"]
+""")
+
+
+
+ losses_path = os.path.join(model_dir,"losses.py")
+ create_file(losses_path,
+"""# define loss functions here
+""")
+
+ metrics_path = os.path.join(model_dir,"metrics.py")
+ create_file(metrics_path,
+"""# define metrics here
+""")
+
+ callbacks_path = os.path.join(model_dir,"callbacks.py")
+ create_file(callbacks_path,
+"""# define callbacks here
+""")
+
+ dataloaders_path = os.path.join(model_dir,"dataloaders.py")
+ create_file(dataloaders_path,
+"""# define dataloaders here
+""")
+
+
+def create_scripts(scripts_dir,sub_task):
+ create_dataset_path = os.path.join(scripts_dir,"create_dataset.py")
+ create_file(create_dataset_path,
+f"""import os,shutil
+import argparse
+
+def create_file(file_path,file_content):
+ with open(file_path,"w") as f:
+ f.write(file_content)
+
+def create_dataset(args):
+ dataset_name = args.name
+ force_flag = args.force
+ datasets_dir = os.path.join('src','{sub_task}','data','datasets')
+
+ os.makedirs(datasets_dir,exist_ok=True)
+ dataset_path = os.path.join(datasets_dir,dataset_name+".py")
+
+ # deleted old dataset if force flag exists and dataset already exists
+ if os.path.exists(dataset_path):
+ if force_flag:
+ print("Replacing existing dataset:",dataset_name)
+ shutil.remove(dataset_path)
+ else:
+ print(f"{{dataset_name}} already exists, use --force flag if you want to reset it to default")
+ exit()
+
+
+ create_file(dataset_path,
+\"\"\"# write dataset downloading preparation code in this file
+# Note: download_prepare() this is specially choosen name so don't change this function's name
+# you can add, remove and change any other function from this file
+
+def download_prepare():
+ \\"\\"\\" function used to download dataset and apply
+ all type of data preprocessing required to prepare the dataset
+ \\"\\"\\"
+ download_dataset()
+ unzip_dataset()
+ clean_dataset()
+ move_dataset()
+
+
+def download_dataset():
+ \\"\\"\\"download dataset\\"\\"\\"
+ pass
+
+def unzip_dataset():
+ \\"\\"\\"unzip dataset(if required)\\"\\"\\"
+ pass
+
+def clean_dataset():
+ \\"\\"\\"clean dataset(if required)\\"\\"\\"
+ pass
+
+def move_dataset():
+ \\"\\"\\"move dataset to processed folder\\"\\"\\"
+ pass
+\"\"\")
+
+def main():
+ parser = argparse.ArgumentParser(description="Create blueprint dataset")
+ parser.add_argument('name',type=str,help="name of dataset (e.g., pascal-voc)")
+ parser.add_argument("--force",action="store_true",help="forcefully replace old existing dataset to default",default=False)
+ args = parser.parse_args()
+ create_dataset(args)
+
+if __name__=="__main__":
+ main()
+
+""")
+
+ create_model_path = os.path.join(scripts_dir,"create_model.py")
+ create_file(create_model_path,
+f"""import os,shutil
+import argparse
+
+def create_file(file_path,file_content):
+ with open(file_path,"w") as f:
+ f.write(file_content)
+
+def create_model(args):
+ model_name = args.name
+ force_flag = args.force
+ models_dir = os.path.join('src','{sub_task}','model',"models")
+ os.makedirs(models_dir,exist_ok=True)
+ model_path = os.path.join(models_dir,model_name+".py")
+
+ # deleted old model if force flag exists and model already exists
+ if os.path.exists(model_path):
+ if force_flag:
+ print("Replacing existing model:",model_name)
+ shutil.remove(model_path)
+ else:
+ print(f"{{model_name}} already exists, use --force flag if you want to reset it to default")
+ exit()
+
+
+ model_name_camel_case = "".join([part.capitalize() for part in model_name.split("_")])
+ create_file(model_path,
+f\"\"\"from src.{sub_task}.model.base_model_interface import BaseModel
+
+class Model(BaseModel):
+ def train(self):
+ pass
+
+ def predict(self,inputs):
+ pass
+\"\"\")
+
+def main():
+ parser = argparse.ArgumentParser(description="Create blueprint model")
+ parser.add_argument('name',type=str,help="name of model (e.g., model_v2)")
+ parser.add_argument("--force",action="store_true",help="forcefully replace old existing model to default",default=False)
+ args = parser.parse_args()
+ create_model(args)
+
+if __name__=="__main__":
+ main()
+
+""")
+
+
+
+def create_sub_task(args):
+ """Used to create sub_task within our main task"""
+ sub_task = args.sub_task
+ force_flag = args.force
+ dataset_name = "dataset1"
+ model_name = "model1"
+
+ sub_task_dir = os.path.join('src',sub_task)
+ data_dir = os.path.join(sub_task_dir,'data')
+ model_dir = os.path.join(sub_task_dir,'model')
+ scripts_dir = os.path.join(sub_task_dir,"scripts")
+ # print(scripts_dir)
+ # deleted old sub_task if force flag exists and sub_task already exists
+ if os.path.exists(sub_task_dir):
+ if force_flag:
+ print("Replacing existing sub_task:",sub_task)
+ shutil.rmtree(sub_task_dir)
+ else:
+ print(f"{sub_task} already exists, use --force flag if you want to reset it to default")
+ exit()
+
+ # create empty folders
+ os.makedirs(sub_task_dir,exist_ok=True)
+ os.makedirs(data_dir,exist_ok=True)
+ os.makedirs(model_dir,exist_ok=True)
+ os.makedirs(scripts_dir,exist_ok=True)
+
+ # make config validator file
+ validate_config_file_path = os.path.join(sub_task_dir,"validate_config.py")
+ create_file(validate_config_file_path,
+'''# from cerberus import Validator
+
+# write config file schema here
+# based on cerberus Validator
+
+schema = {
+ "seed": {
+ "type": "integer",
+ },
+ "image_size": {"type": "integer", "required": True},
+ "train_size": {"type": "float", "required": True},
+ "shuffle": {"type": "boolean", "required": True},
+ "batch_size": {
+ "type": "integer",
+ "required": True,
+ },
+ "epochs": {
+ "type": "integer",
+ "required": True,
+ },
+}
+
+''')
+
+ # make scripts files
+ create_scripts(scripts_dir,sub_task)
+
+ # make data files
+ create_data(data_dir,dataset_name,sub_task_dir)
+
+ # make model files
+ create_model(model_dir,model_name,sub_task_dir)
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Create blueprint sub_task")
+ parser.add_argument('sub_task',type=str,help="sub_task of project (e.g., simple_regression_colorization)")
+ parser.add_argument("--force",action="store_true",help="forcefully replace old existing sub_task to default",default=False)
+ args = parser.parse_args()
+
+ create_sub_task(args)
+
+if __name__=="__main__":
+ main()
+
\ No newline at end of file
diff --git a/src/scripts/prepare_dataset.py b/src/scripts/prepare_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8e1ffc7ea7110d66a5bf94d11321ab616b14ddf
--- /dev/null
+++ b/src/scripts/prepare_dataset.py
@@ -0,0 +1,31 @@
+import argparse
+from src.utils.config_loader import Config
+from src.utils import config_loader
+from src.utils.script_utils import validate_config
+import importlib
+
+
+def prepare_dataset(args):
+ config_file_path = args.config_file
+ config = Config(config_file_path)
+
+ # validate config
+ validate_config(config)
+
+ # set config globally
+ config_loader.config = config
+
+ # now prepare the dataset
+ download_prepare = importlib.import_module(f"src.{config.task}.data.datasets.{config.dataset}").download_prepare
+ print("Preparing dataset")
+ download_prepare()
+ print("Prepared dataset")
+
+def main():
+ parser = argparse.ArgumentParser(description="Prepare dataset based on config yaml file")
+ parser.add_argument("config_file",type=str)
+ args = parser.parse_args()
+ prepare_dataset(args)
+
+if __name__=="__main__":
+ main()
\ No newline at end of file
diff --git a/src/scripts/visualize_dataset.py b/src/scripts/visualize_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2179ce45684a47dc1a212f140b2f3701a43b90d7
--- /dev/null
+++ b/src/scripts/visualize_dataset.py
@@ -0,0 +1,29 @@
+import argparse
+from src.utils.config_loader import Config
+from src.utils import config_loader
+from src.utils.script_utils import validate_config
+import importlib
+
+
+def visualize_dataset(args):
+ config_file_path = args.config_file
+ config = Config(config_file_path)
+
+ # validate config
+ validate_config(config)
+
+ # set config globally
+ config_loader.config = config
+
+ # now visualize the dataset
+ visualize_fn = importlib.import_module(f"src.{config.task}.data.visualize_dataset").visualize
+ visualize_fn()
+
+def main():
+ parser = argparse.ArgumentParser(description="Prepare dataset based on config yaml file")
+ parser.add_argument("config_file",type=str)
+ args = parser.parse_args()
+ visualize_dataset(args)
+
+if __name__=="__main__":
+ main()
\ No newline at end of file
diff --git a/src/simple_regression_colorization/data/datasets/forests.py b/src/simple_regression_colorization/data/datasets/forests.py
new file mode 100644
index 0000000000000000000000000000000000000000..1adf3736a9e31b94c5c98d2520e79a4b1e03ae7f
--- /dev/null
+++ b/src/simple_regression_colorization/data/datasets/forests.py
@@ -0,0 +1,79 @@
+from src.utils.data_utils import download_personal_hf_dataset,unzip_file,is_bw,print_title
+from zipfile import ZipFile
+from pathlib import Path
+from src.utils.config_loader import constants
+from glob import glob
+import shutil,os
+from tqdm import tqdm
+import cv2
+
+
+
+# write dataset downloading preparation code in this file
+# Note: download_prepare() this is specially choosen name so don't change this function's name
+# you can add, remove and change any other function from this file
+
+def download_prepare():
+ """ function used to download dataset and apply
+ all type of data preprocessing required to prepare the dataset
+ """
+ download_dataset()
+ unzip_dataset()
+ clean_dataset()
+
+
+def download_dataset():
+ """Used to download dataset from hugging face"""
+ print_title(f"Downloading forests dataset from hugging face")
+ # download_hf_dataset("")
+ download_personal_hf_dataset("forests")
+
+
+
+def unzip_dataset():
+ print_title(f"Unzipping dataset")
+
+ unzip_file(constants.RAW_DATASET_DIR/Path("forests/trainval.zip"),
+ constants.INTERIM_DATASET_DIR/Path("trainval/"))
+
+ unzip_file(constants.RAW_DATASET_DIR/Path("forests/test.zip"),
+ constants.INTERIM_DATASET_DIR/Path("test/"))
+
+
+
+def clean_dataset():
+ print_title("CLEANING DATASET")
+ trainval_dir = constants.INTERIM_DATASET_DIR / Path("trainval/")
+ test_dir = constants.INTERIM_DATASET_DIR / Path("test/")
+
+ trainval_paths = glob(str(trainval_dir/Path("*")))
+ test_paths = glob(str(test_dir/Path("*")))
+
+ print("train,test: ",len(trainval_paths),",",len(test_paths),sep="")
+
+
+ def clean(image_paths,destination_dir):
+ if os.path.exists(destination_dir): shutil.rmtree(destination_dir)
+ os.makedirs(destination_dir)
+ for i in tqdm(range(len(image_paths))):
+ img = cv2.imread(image_paths[i])
+ img = cv2.resize(img,[128,128])
+ if not is_bw(img):
+ shutil.copy(trainval_paths[i],
+ destination_dir)
+ print("saved to:",destination_dir)
+
+ destination_dir = constants.PROCESSED_DATASET_DIR/Path("trainval/")
+ clean(trainval_paths,destination_dir)
+
+ destination_dir = constants.PROCESSED_DATASET_DIR/Path("test/")
+ clean(test_paths,destination_dir)
+
+ trainval_dir = constants.PROCESSED_DATASET_DIR / Path("trainval/")
+ test_dir = constants.PROCESSED_DATASET_DIR / Path("test/")
+
+ trainval_paths = glob(str(trainval_dir/Path("*")))
+ test_paths = glob(str(test_dir/Path("*")))
+
+ print("after cleaning train,test: ",len(trainval_paths),",",len(test_paths),sep="")
+
diff --git a/src/simple_regression_colorization/data/register_datasets.py b/src/simple_regression_colorization/data/register_datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb4b7cce983777d854779a58dcd9127a4a69530d
--- /dev/null
+++ b/src/simple_regression_colorization/data/register_datasets.py
@@ -0,0 +1,4 @@
+# register your datasets here
+
+datasets = ["forests"]
+
diff --git a/src/simple_regression_colorization/data/visualize_dataset.py b/src/simple_regression_colorization/data/visualize_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..5edc42b42a9c89f550259f72f31707f9cad26f69
--- /dev/null
+++ b/src/simple_regression_colorization/data/visualize_dataset.py
@@ -0,0 +1,21 @@
+from src.utils.data_utils import show_images_from_paths
+from src.utils.config_loader import constants,config
+from glob import glob
+import numpy as np
+
+# the data is at constants.PROCESSED_DATASET_DIR/trainval
+# constants.PROCESSED_DATASET_DIR/test
+
+def visualize():
+ n = 16
+ image_paths = glob(f"{constants.PROCESSED_DATASET_DIR}/trainval/*")
+ choosen_paths = np.random.choice(image_paths,n)
+ show_images_from_paths(choosen_paths,
+ title="sample of train_val dataset",
+ image_size=config.image_size)
+
+ image_paths = glob(f"{constants.PROCESSED_DATASET_DIR}/test/*")
+ choosen_paths = np.random.choice(image_paths,n)
+ show_images_from_paths(choosen_paths,
+ title="sample of test dataset",
+ image_size=config.image_size)
\ No newline at end of file
diff --git a/src/simple_regression_colorization/model/base_model_interface.py b/src/simple_regression_colorization/model/base_model_interface.py
new file mode 100644
index 0000000000000000000000000000000000000000..9356c209c90976c602c774cd103cf52c4ea774b7
--- /dev/null
+++ b/src/simple_regression_colorization/model/base_model_interface.py
@@ -0,0 +1,14 @@
+import numpy as np
+from abc import ABC, abstractmethod
+
+# BaseModel Abstract class
+# all the models within this sub_task must inherit this class
+
+class BaseModel(ABC):
+ @abstractmethod
+ def train(self):
+ pass
+
+ @abstractmethod
+ def predict(self,inputs):
+ pass
diff --git a/src/simple_regression_colorization/model/callbacks.py b/src/simple_regression_colorization/model/callbacks.py
new file mode 100644
index 0000000000000000000000000000000000000000..3eb1a6e02463d1fcfd4e4fcb47275cceb2a485f0
--- /dev/null
+++ b/src/simple_regression_colorization/model/callbacks.py
@@ -0,0 +1 @@
+# define callbacks here
diff --git a/src/data/load_dataset.py b/src/simple_regression_colorization/model/dataloaders.py
similarity index 61%
rename from src/data/load_dataset.py
rename to src/simple_regression_colorization/model/dataloaders.py
index 1aefc806c95bb47c12f452c369ef3ac0aa11bd47..bb42ed0e3adf559ef2060f675fc63eb2e9201bdc 100644
--- a/src/data/load_dataset.py
+++ b/src/simple_regression_colorization/model/dataloaders.py
@@ -1,15 +1,14 @@
-import os,sys;sys.path.append(os.getcwd())
import tensorflow as tf
-from src import config
-from src.utils import *
+from src.utils.data_utils import scale_L,scale_AB,rescale_AB,rescale_L
+from src.utils.config_loader import config
from pathlib import Path
from glob import glob
import sklearn.model_selection
from skimage.color import rgb2lab, lab2rgb
def get_datasets():
- trainval_dir = Path(config.processed_dataset_dir) / Path("trainval/")
- test_dir = Path(config.processed_dataset_dir) / Path("test/")
+ trainval_dir = config.PROCESSED_DATASET_DIR / Path("trainval/")
+ test_dir = config.PROCESSED_DATASET_DIR / Path("test/")
trainval_paths = glob(str(trainval_dir/Path("*")))
test_paths = glob(str(test_dir/Path("*")))
@@ -22,26 +21,15 @@ def get_datasets():
train_size=0.8,
random_state=324)
- print("train|val split:",len(train_paths),"|",len(val_paths))
-
- train_ds = get_ds(train_paths,bs=config.batch_size,shuffle=config.shuffle)
- val_ds = get_ds(val_paths,bs=config.batch_size,shuffle=False,is_val=True)
- test_ds = get_ds(test_paths,bs=config.batch_size,shuffle=False,is_val=True)
+ print("train|val|test:",len(train_paths),"|",len(val_paths),"|",len(test_paths))
+
+ train_ds = get_tf_ds(train_paths,bs=config.batch_size,shuffle=config.shuffle)
+ val_ds = get_tf_ds(val_paths,bs=config.batch_size,shuffle=False,is_val=True)
+ test_ds = get_tf_ds(test_paths,bs=config.batch_size,shuffle=False,is_val=True)
return train_ds,val_ds,test_ds
-# def test_dataset():
-# train_ds = get_ds(train_paths,shuffle=False)
-# L_batch,AB_batch = next(iter(train_ds))
-# L_batch = L_batch.numpy()
-# AB_batch = AB_batch.numpy()
-# print("L:",L_batch.min(),L_batch.max())
-# print("A:",AB_batch[:,:,:,0].min(),AB_batch[:,:,:,0].max())
-# print("B:",AB_batch[:,:,:,1].min(),AB_batch[:,:,:,1].max())
-
-
-
def tf_RGB_TO_LAB(image):
def f(image):
image = rgb2lab(image)
@@ -63,7 +51,7 @@ def load_img(img_path):
L,AB = scale_L(L),scale_AB(AB)
return L,AB
-def get_ds(image_paths,bs=8,shuffle=False,is_val=False):
+def get_tf_ds(image_paths,bs=8,shuffle=False,is_val=False):
ds = tf.data.Dataset.from_tensor_slices(image_paths)
if shuffle: ds = ds.shuffle(len(image_paths))
ds = ds.map(load_img,num_parallel_calls=tf.data.AUTOTUNE)
diff --git a/src/simple_regression_colorization/model/losses.py b/src/simple_regression_colorization/model/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cff477946cca87b5f3d1504dcb578c565e16e61
--- /dev/null
+++ b/src/simple_regression_colorization/model/losses.py
@@ -0,0 +1 @@
+# define loss functions here
diff --git a/src/simple_regression_colorization/model/metrics.py b/src/simple_regression_colorization/model/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..00ff0c273bd6e4935498e15d432a2dd18ccf7b3c
--- /dev/null
+++ b/src/simple_regression_colorization/model/metrics.py
@@ -0,0 +1 @@
+# define metrics here
diff --git a/src/simple_regression_colorization/model/models/model_v1.py b/src/simple_regression_colorization/model/models/model_v1.py
new file mode 100644
index 0000000000000000000000000000000000000000..b94dcb0c52964a14b9f850d330a2d9552b951949
--- /dev/null
+++ b/src/simple_regression_colorization/model/models/model_v1.py
@@ -0,0 +1,30 @@
+from src.simple_regression_colorization.model.base_model_interface import BaseModel
+from src.simple_regression_colorization.model.dataloaders import get_datasets
+
+class Model(BaseModel):
+
+ def __init__(self):
+ # make model architecture
+ # load weights (optional)
+ # create dataset loaders
+ # train
+ # predict
+ self.init_model()
+ self.load_weights()
+ self.prepare_data()
+
+
+ def init_model(self):
+ pass
+
+ def load_weights(self,path=None):
+ pass
+
+ def prepare_data(self):
+ self.train_ds,self.val_ds,self.test_ds = get_datasets()
+
+ def train(self):
+ pass
+
+ def predict(self,inputs):
+ pass
diff --git a/src/simple_regression_colorization/model/register_models.py b/src/simple_regression_colorization/model/register_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..adf0c77e6518596339ea51392cdad1e7c3e93df9
--- /dev/null
+++ b/src/simple_regression_colorization/model/register_models.py
@@ -0,0 +1,2 @@
+# register models of this sub_task here
+models = ["model_v1"]
diff --git a/src/simple_regression_colorization/scripts/create_dataset.py b/src/simple_regression_colorization/scripts/create_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6cd89e683dd354b9191e93859edc495532246c6
--- /dev/null
+++ b/src/simple_regression_colorization/scripts/create_dataset.py
@@ -0,0 +1,67 @@
+import os,shutil
+import argparse
+
+def create_file(file_path,file_content):
+ with open(file_path,"w") as f:
+ f.write(file_content)
+
+def create_dataset(args):
+ dataset_name = args.name
+ force_flag = args.force
+ datasets_dir = os.path.join('src','simple_regression_colorization','data','datasets')
+
+ os.makedirs(datasets_dir,exist_ok=True)
+ dataset_path = os.path.join(datasets_dir,dataset_name+".py")
+
+ # deleted old dataset if force flag exists and dataset already exists
+ if os.path.exists(dataset_path):
+ if force_flag:
+ print("Replacing existing dataset:",dataset_name)
+ shutil.remove(dataset_path)
+ else:
+ print(f"{dataset_name} already exists, use --force flag if you want to reset it to default")
+ exit()
+
+
+ create_file(dataset_path,
+"""# write dataset downloading preparation code in this file
+# Note: download_prepare() this is specially choosen name so don't change this function's name
+# you can add, remove and change any other function from this file
+
+def download_prepare():
+ \"\"\" function used to download dataset and apply
+ all type of data preprocessing required to prepare the dataset
+ \"\"\"
+ download_dataset()
+ unzip_dataset()
+ clean_dataset()
+ move_dataset()
+
+
+def download_dataset():
+ \"\"\"download dataset\"\"\"
+ pass
+
+def unzip_dataset():
+ \"\"\"unzip dataset(if required)\"\"\"
+ pass
+
+def clean_dataset():
+ \"\"\"clean dataset(if required)\"\"\"
+ pass
+
+def move_dataset():
+ \"\"\"move dataset to processed folder\"\"\"
+ pass
+""")
+
+def main():
+ parser = argparse.ArgumentParser(description="Create blueprint dataset")
+ parser.add_argument('name',type=str,help="name of dataset (e.g., pascal-voc)")
+ parser.add_argument("--force",action="store_true",help="forcefully replace old existing dataset to default",default=False)
+ args = parser.parse_args()
+ create_dataset(args)
+
+if __name__=="__main__":
+ main()
+
diff --git a/src/simple_regression_colorization/scripts/create_model.py b/src/simple_regression_colorization/scripts/create_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..758ce389624616f78e20a3a8d3f05d378642eefb
--- /dev/null
+++ b/src/simple_regression_colorization/scripts/create_model.py
@@ -0,0 +1,46 @@
+import os,shutil
+import argparse
+
+def create_file(file_path,file_content):
+ with open(file_path,"w") as f:
+ f.write(file_content)
+
+def create_model(args):
+ model_name = args.name
+ force_flag = args.force
+ models_dir = os.path.join('src','simple_regression_colorization','model',"models")
+ os.makedirs(models_dir,exist_ok=True)
+ model_path = os.path.join(models_dir,model_name+".py")
+
+ # deleted old model if force flag exists and model already exists
+ if os.path.exists(model_path):
+ if force_flag:
+ print("Replacing existing model:",model_name)
+ shutil.remove(model_path)
+ else:
+ print(f"{model_name} already exists, use --force flag if you want to reset it to default")
+ exit()
+
+
+ model_name_camel_case = "".join([part.capitalize() for part in model_name.split("_")])
+ create_file(model_path,
+f"""from src.simple_regression_colorization.model.base_model_interface import BaseModel
+
+class Model(BaseModel):
+ def train(self):
+ pass
+
+ def predict(self,inputs):
+ pass
+""")
+
+def main():
+ parser = argparse.ArgumentParser(description="Create blueprint model")
+ parser.add_argument('name',type=str,help="name of model (e.g., model_v2)")
+ parser.add_argument("--force",action="store_true",help="forcefully replace old existing model to default",default=False)
+ args = parser.parse_args()
+ create_model(args)
+
+if __name__=="__main__":
+ main()
+
diff --git a/src/simple_regression_colorization/validate_config.py b/src/simple_regression_colorization/validate_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..8651ea1125369fbdad1a68c73854be1beba5c370
--- /dev/null
+++ b/src/simple_regression_colorization/validate_config.py
@@ -0,0 +1,22 @@
+# from cerberus import Validator
+
+# write config file schema here
+# based on cerberus Validator
+
+schema = {
+ "seed": {
+ "type": "integer",
+ },
+ "image_size": {"type": "integer", "required": True},
+ "train_size": {"type": "float", "required": True},
+ "shuffle": {"type": "boolean", "required": True},
+ "batch_size": {
+ "type": "integer",
+ "required": True,
+ },
+ "epochs": {
+ "type": "integer",
+ "required": True,
+ },
+}
+
diff --git a/src/utils.py b/src/utils.py
deleted file mode 100644
index 0167d29cd6db988f0f35255270c064dbfbc22214..0000000000000000000000000000000000000000
--- a/src/utils.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import yaml
-import numpy as np
-
-class Config:
- def __init__(self,path="config.yaml"):
- with open(path,'r') as f:
- self.config = yaml.safe_load(f)
-
- def __str__(self):
- return str(self.config)
-
- def __getattr__(self, name: str):
- return self.config.get(name)
-
- # def __setattr__(self, name: str, value: any):
- # self.config[name]=value
-
-def is_bw(img):
- rg,gb,rb = img[:,:,0]-img[:,:,1] , img[:,:,1]-img[:,:,2] , img[:,:,0]-img[:,:,2]
- rg,gb,rb = np.abs(rg).sum(),np.abs(gb).sum(),np.abs(rb).sum()
- avg = np.mean([rg,gb,rb])
- # print(rg,gb,rb)
-
- return avg<10
-
-def print_title(msg:str,n=30):
- print("="*n,msg.upper(),"="*n,sep="")
-
-def scale_L(L):
- return L/100
-def rescale_L(L):
- return L*100
-
-def scale_AB(AB):
- return AB/128
-
-def rescale_AB(AB):
- return AB*128
-
\ No newline at end of file
diff --git a/src/utils/config_loader.py b/src/utils/config_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cf751ae71e95289360dddb9bd71ab7c26af71e5
--- /dev/null
+++ b/src/utils/config_loader.py
@@ -0,0 +1,23 @@
+import yaml
+from pathlib import Path
+
+class Config:
+ def __init__(self,config_file_path:str):
+ """loads config from config_file_path"""
+ with open(config_file_path,"r") as f:
+ self.config_dict = yaml.safe_load(f)
+
+ def __str__(self):
+ return str(self.config_dict)
+
+ def __getattr__(self,name):
+ return self.config_dict.get(name)
+
+
+# exports constants
+constants = Config("constants.yaml")
+constants.config_dict['RAW_DATASET_DIR'] = Path(constants.config_dict['RAW_DATASET_DIR'])
+constants.config_dict['INTERIM_DATASET_DIR'] = Path(constants.config_dict['INTERIM_DATASET_DIR'])
+constants.config_dict['PROCESSED_DATASET_DIR'] = Path(constants.config_dict['PROCESSED_DATASET_DIR'])
+
+config = None
\ No newline at end of file
diff --git a/src/utils/data_utils.py b/src/utils/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..835be7cc711c7586d0c50a1fc2afd46953260941
--- /dev/null
+++ b/src/utils/data_utils.py
@@ -0,0 +1,77 @@
+from src.utils.config_loader import constants
+from huggingface_hub import snapshot_download
+from zipfile import ZipFile
+import numpy as np
+import shutil
+import matplotlib.pyplot as plt
+import cv2
+import math
+
+
+def download_hf_dataset(repo_id,allow_patterns=None):
+ """Used to download dataset from any public hugging face dataset"""
+ snapshot_download(repo_id=repo_id,
+ repo_type="dataset",
+ local_dir=constants.RAW_DATASET_DIR,
+ allow_patterns=allow_patterns)
+
+
+def download_personal_hf_dataset(name):
+ """Used to download dataset from a specific hugging face dataset"""
+ download_hf_dataset(repo_id="Anuj-Panthri/Image-Colorization-Datasets",
+ allow_patterns=f"{name}/*")
+
+
+def unzip_file(file_path,destination_dir):
+ """unzips file to destination_dir"""
+ shutil.rmtree(destination_dir)
+ with ZipFile(file_path,"r") as zip:
+ zip.extractall(destination_dir)
+
+def is_bw(img:np.ndarray):
+ """checks if RGB image is black and white"""
+ rg,gb,rb = img[:,:,0]-img[:,:,1] , img[:,:,1]-img[:,:,2] , img[:,:,0]-img[:,:,2]
+ rg,gb,rb = np.abs(rg).sum(),np.abs(gb).sum(),np.abs(rb).sum()
+ avg = np.mean([rg,gb,rb])
+
+ return avg<10
+
+
+def print_title(msg:str,max_chars=105):
+ n = (max_chars-len(msg))//2
+ print("="*n,msg.upper(),"="*n,sep="")
+
+def scale_L(L):
+ return L/100
+
+def rescale_L(L):
+ return L*100
+
+def scale_AB(AB):
+ return AB/128
+
+def rescale_AB(AB):
+ return AB*128
+
+
+
+def show_images_from_paths(image_paths:list[str],image_size=64,cols=4,row_size=5,col_size=5,show_BW=False,title=None):
+ n = len(image_paths)
+ rows = math.ceil(n/cols)
+ fig = plt.figure(figsize=(col_size*cols,row_size*rows))
+ if title:
+ plt.title(title)
+ plt.axis("off")
+
+ for i in range(n):
+ fig.add_subplot(rows,cols,i+1)
+
+ img = cv2.imread(image_paths[i])[:,:,::-1]
+ img = cv2.resize(img,[image_size,image_size])
+
+ if show_BW:
+ BW = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
+ BW = np.tile(BW,(1,1,3))
+ img = np.concatenate([BW,img],axis=1)
+ plt.imshow(img.astype("uint8"))
+ plt.show()
diff --git a/src/utils/script_utils.py b/src/utils/script_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0bab5d008a133c689e35444ef1c90b4503e85816
--- /dev/null
+++ b/src/utils/script_utils.py
@@ -0,0 +1,47 @@
+from cerberus import Validator
+import importlib
+import os
+
+def validate_config(config):
+ basic_schema = {
+ "task": {
+ "type":"string",
+ "required":True
+ },
+ "dataset": {
+ "type":"string",
+ "required":True
+ },
+ "model": {
+ "type":"string",
+ "required":True
+ },
+ }
+ basic_v = Validator(basic_schema,allow_unknown=True)
+
+ if not basic_v.validate(config.config_dict):
+ raise Exception(f"Invalid config file:",basic_v.errors)
+
+ # check if such task exists
+ if not os.path.exists(os.path.join("src",config.task)):
+ raise Exception("Invalid config file:",f"no such task {config.task}")
+
+ # check if valid dataset
+ all_datasets = importlib.import_module(f"src.{config.task}.data.register_datasets").datasets
+ if config.dataset not in all_datasets:
+ raise Exception("Invalid config file:",f"no {config.dataset} dataset found in registered datasets: {all_datasets}")
+
+ # check if valid model
+ all_models = importlib.import_module(f"src.{config.task}.model.register_models").models
+ if config.model not in all_models:
+ raise Exception("Invalid config file:",f"no {config.model} model found in registered models: {all_models}")
+
+
+
+ # check the sub_task's validate_config schema
+ task_schema = importlib.import_module(f"src.{config.task}.validate_config").schema
+ sub_task_v = Validator(task_schema,allow_unknown=True)
+
+ if not sub_task_v.validate(config.config_dict):
+ raise Exception(f"Invalid config file:",sub_task_v.errors)
+
diff --git a/src/visualization/.gitkeep b/src/visualization/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/visualization/__init__.py b/src/visualization/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/src/visualization/visualize.py b/src/visualization/visualize.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/test_environment.py b/test_environment.py
deleted file mode 100644
index cef7e6c71c9d251bdd8034c3b8c324f445ecc525..0000000000000000000000000000000000000000
--- a/test_environment.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import sys
-
-REQUIRED_PYTHON = "python"
-
-
-def main():
- system_major = sys.version_info.major
- if REQUIRED_PYTHON == "python":
- required_major = 2
- elif REQUIRED_PYTHON == "python3":
- required_major = 3
- else:
- raise ValueError("Unrecognized python interpreter: {}".format(
- REQUIRED_PYTHON))
-
- if system_major != required_major:
- raise TypeError(
- "This project requires Python {}. Found: Python {}".format(
- required_major, sys.version))
- else:
- print(">>> Development environment passes all tests!")
-
-
-if __name__ == '__main__':
- main()
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index c32fbd859bf4cc4f5e2e65690c00889b54683af7..0000000000000000000000000000000000000000
--- a/tox.ini
+++ /dev/null
@@ -1,3 +0,0 @@
-[flake8]
-max-line-length = 79
-max-complexity = 10