diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..1ef8dbc2a4c5d95961183bfe5e0db7a7443ef475 --- /dev/null +++ b/.gitignore @@ -0,0 +1,163 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + + +models \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..61256718471d855eff45b6415015bc297bf00c18 --- /dev/null +++ b/app.py @@ -0,0 +1,98 @@ +import glob +import os + +import streamlit as st +from datastore import ChromaStore +from embeddings import Embedding +from PIL import Image +from tqdm import tqdm + +from utils import base64_to_image, image_to_base64 + +##### Image database +root_dir = os.path.join(os.getcwd(), "data") +jpg_files = glob.glob(os.path.join(root_dir, "**", "*.jpg"), recursive=True) +IMAGE_DATABASE = [Image.open(f).resize((224, 224)) for f in jpg_files] + + +def display_image_database(): + image_database_expander = st.expander(label="Image Database") + with image_database_expander: + st.image(IMAGE_DATABASE) + + +def display_sample_images(): + sample_img_path = os.path.join(os.getcwd(), "sample_imgs") + sample_images = os.listdir(sample_img_path) + + images = [] + for i, img in enumerate(sample_images): + images.append(Image.open(os.path.join(sample_img_path, img)).resize((224, 224))) + + st.image(images) + + +def main(): + st.set_page_config(page_icon="🖼️", page_title="image-search-engine", layout="wide") + st.markdown( + """

🔍️ Image Search Engine

""", + unsafe_allow_html=True, + ) + st.markdown( + """

Image to Image search using transformer embeddings

""", + unsafe_allow_html=True, + ) + + main_layout = st.columns(2) + + with main_layout[0]: + with st.container(border=True, height=550): + st.markdown( + """

Search

""", + unsafe_allow_html=True, + ) + upload_img = st.file_uploader( + label="Query Image", + accept_multiple_files=False, + type=["jpg", "png", "jpeg"], + ) + + submit = st.button(label="Submit") + display_sample_images() + + with main_layout[1]: + with st.container(border=True, height=550): + st.markdown( + """

Results

""", + unsafe_allow_html=True, + ) + top_k = st.slider(label="Search top k results", min_value=3, max_value=10) + if submit and upload_img: + ## encode uplaoded img + query_embedding = Embedding.encode_image(Image.open(upload_img)) + ## query vectorstore + vectorstore = ChromaStore(collection_name="image_store") + collection = vectorstore.create() + # print(collection) + # print(vectorstore.collection_info(collection)) + st.toast("Vectorstore loaded successfully", icon="✅") + results = vectorstore.query( + collection, + query_embedding, + top_k=top_k, + ) + ## show results + + res_images = [] + for res in tqdm(results, desc="Results"): + res_images.append(res[0]) + + st.image(res_images) + else: + st.warning("Please upload an image") + + display_image_database() + + +if __name__ == "__main__": + main() diff --git a/data/airplane/airplane_0000.jpg b/data/airplane/airplane_0000.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f2d88341e6a0aa64176ab5681f911401682ad769 Binary files /dev/null and b/data/airplane/airplane_0000.jpg differ diff --git a/data/airplane/airplane_0001.jpg b/data/airplane/airplane_0001.jpg new file mode 100644 index 0000000000000000000000000000000000000000..522ff6a996ab81cfa1719c0346e3b99d9943a4cb Binary files /dev/null and b/data/airplane/airplane_0001.jpg differ diff --git a/data/airplane/airplane_0002.jpg b/data/airplane/airplane_0002.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b788048e1b477686e563d93cb278369c07719bc6 Binary files /dev/null and b/data/airplane/airplane_0002.jpg differ diff --git a/data/airplane/airplane_0003.jpg b/data/airplane/airplane_0003.jpg new file mode 100644 index 0000000000000000000000000000000000000000..16f034eea7fe872a1984e66313eeb4c78be4b6af Binary files /dev/null and b/data/airplane/airplane_0003.jpg differ diff --git a/data/airplane/airplane_0004.jpg b/data/airplane/airplane_0004.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cccf57a5c3657502a698ce18deca9d17383fc2a4 Binary files /dev/null and b/data/airplane/airplane_0004.jpg differ diff --git a/data/airplane/airplane_0005.jpg b/data/airplane/airplane_0005.jpg new file mode 100644 index 0000000000000000000000000000000000000000..363b2dc3a058aa464a9556d42e83e91acc1ec6b5 Binary files /dev/null and b/data/airplane/airplane_0005.jpg differ diff --git a/data/airplane/airplane_0006.jpg b/data/airplane/airplane_0006.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8de6cbee9b4c88111f73b7cb72321ac03cbca982 Binary files /dev/null and b/data/airplane/airplane_0006.jpg differ diff --git a/data/airplane/airplane_0007.jpg b/data/airplane/airplane_0007.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1c6c4368866b1f053d776f1401eccbb84025eff3 Binary files /dev/null and b/data/airplane/airplane_0007.jpg differ diff --git a/data/airplane/airplane_0008.jpg b/data/airplane/airplane_0008.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6d8f8828d3bb2053e1ba6d3925ba187cd8f48857 Binary files /dev/null and b/data/airplane/airplane_0008.jpg differ diff --git a/data/airplane/airplane_0009.jpg b/data/airplane/airplane_0009.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2f62d49ce66691644f68c4c7a188b55906145819 Binary files /dev/null and b/data/airplane/airplane_0009.jpg differ diff --git a/data/airplane/airplane_0010.jpg b/data/airplane/airplane_0010.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fed84dea7c2aae3cbaa8d40e89819f182b6d2902 Binary files /dev/null and b/data/airplane/airplane_0010.jpg differ diff --git a/data/airplane/airplane_0011.jpg b/data/airplane/airplane_0011.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e490edd2439da3f0eb41e8d263a0493e0c67ee10 Binary files /dev/null and b/data/airplane/airplane_0011.jpg differ diff --git a/data/airplane/airplane_0012.jpg b/data/airplane/airplane_0012.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cf172f813c2181569b7be65ca774b60c7950dd44 Binary files /dev/null and b/data/airplane/airplane_0012.jpg differ diff --git a/data/airplane/airplane_0013.jpg b/data/airplane/airplane_0013.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6e31625c63029fe1c3b93c46b2a6f4f8c8b0876a Binary files /dev/null and b/data/airplane/airplane_0013.jpg differ diff --git a/data/airplane/airplane_0014.jpg b/data/airplane/airplane_0014.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b8c9bbb12a87aaffa63ce675991809b0a1aaa0fa Binary files /dev/null and b/data/airplane/airplane_0014.jpg differ diff --git a/data/airplane/airplane_0015.jpg b/data/airplane/airplane_0015.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e70ceace3b43d3f4eaf281df8a69a48978101de6 Binary files /dev/null and b/data/airplane/airplane_0015.jpg differ diff --git a/data/airplane/airplane_0016.jpg b/data/airplane/airplane_0016.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ebf07561c8174ac5b16930c6c5148d2f2abaf820 Binary files /dev/null and b/data/airplane/airplane_0016.jpg differ diff --git a/data/airplane/airplane_0017.jpg b/data/airplane/airplane_0017.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5c03258b5b78b866f7d2bc489b61d48e40051014 Binary files /dev/null and b/data/airplane/airplane_0017.jpg differ diff --git a/data/airplane/airplane_0018.jpg b/data/airplane/airplane_0018.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9af8b6637d53e85d680150a69dd9c05a9aec1959 Binary files /dev/null and b/data/airplane/airplane_0018.jpg differ diff --git a/data/airplane/airplane_0019.jpg b/data/airplane/airplane_0019.jpg new file mode 100644 index 0000000000000000000000000000000000000000..68dd30a141d061c45d5a76d7999afa560fbe57a0 Binary files /dev/null and b/data/airplane/airplane_0019.jpg differ diff --git a/data/airplane/airplane_0020.jpg b/data/airplane/airplane_0020.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6d3c91c95522dc4b5ee9a85e836d7008487027ba Binary files /dev/null and b/data/airplane/airplane_0020.jpg differ diff --git a/data/airplane/airplane_0021.jpg b/data/airplane/airplane_0021.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ca02e48c8438ecf3396ad0f29d9e8d37e73f2de7 Binary files /dev/null and b/data/airplane/airplane_0021.jpg differ diff --git a/data/airplane/airplane_0022.jpg b/data/airplane/airplane_0022.jpg new file mode 100644 index 0000000000000000000000000000000000000000..64c91bea0d44251e365c1d85ca08ac7e50bbcf3b Binary files /dev/null and b/data/airplane/airplane_0022.jpg differ diff --git a/data/airplane/airplane_0023.jpg b/data/airplane/airplane_0023.jpg new file mode 100644 index 0000000000000000000000000000000000000000..04cb476ac1b0c91bc484d2ab26dd511355d8c856 Binary files /dev/null and b/data/airplane/airplane_0023.jpg differ diff --git a/data/airplane/airplane_0024.jpg b/data/airplane/airplane_0024.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0a40fc8181639efb7529df3bd214ed4d0b63df8f Binary files /dev/null and b/data/airplane/airplane_0024.jpg differ diff --git a/data/airplane/airplane_0025.jpg b/data/airplane/airplane_0025.jpg new file mode 100644 index 0000000000000000000000000000000000000000..36aa5fa49ce93b221286afa38aeafa9c9641765a Binary files /dev/null and b/data/airplane/airplane_0025.jpg differ diff --git a/data/airplane/airplane_0026.jpg b/data/airplane/airplane_0026.jpg new file mode 100644 index 0000000000000000000000000000000000000000..84af9ed3b4fbb427a9a82cb8b3c338e11447152a Binary files /dev/null and b/data/airplane/airplane_0026.jpg differ diff --git a/data/airplane/airplane_0027.jpg b/data/airplane/airplane_0027.jpg new file mode 100644 index 0000000000000000000000000000000000000000..65a6345c9d89775ab15679d96d7ab2f95602c6bd Binary files /dev/null and b/data/airplane/airplane_0027.jpg differ diff --git a/data/airplane/airplane_0028.jpg b/data/airplane/airplane_0028.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1c8770b53fdea63c500d0b616546cd80f5662469 Binary files /dev/null and b/data/airplane/airplane_0028.jpg differ diff --git a/data/airplane/airplane_0029.jpg b/data/airplane/airplane_0029.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c0c42facf1b05073a798dad15acf7a3c072608d0 Binary files /dev/null and b/data/airplane/airplane_0029.jpg differ diff --git a/data/airplane/airplane_0030.jpg b/data/airplane/airplane_0030.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cf301a765170070016abeb1806a23c0c4ab4cd07 Binary files /dev/null and b/data/airplane/airplane_0030.jpg differ diff --git a/data/car/car_0000.jpg b/data/car/car_0000.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1d4b8fec46b293315e9c3344876ef1b4bf457b2e Binary files /dev/null and b/data/car/car_0000.jpg differ diff --git a/data/car/car_0001.jpg b/data/car/car_0001.jpg new file mode 100644 index 0000000000000000000000000000000000000000..68c803b878e6049c4972389eb432bc22bcc0a118 Binary files /dev/null and b/data/car/car_0001.jpg differ diff --git a/data/car/car_0002.jpg b/data/car/car_0002.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ded8d347d8d4c5def3344d803e39ceb059d164a5 Binary files /dev/null and b/data/car/car_0002.jpg differ diff --git a/data/car/car_0003.jpg b/data/car/car_0003.jpg new file mode 100644 index 0000000000000000000000000000000000000000..21e44b6dd23716f04568b0a6094008f9c45f78d0 Binary files /dev/null and b/data/car/car_0003.jpg differ diff --git a/data/car/car_0004.jpg b/data/car/car_0004.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2eec2a25553493c7004146795d5d6afe3f56eac6 Binary files /dev/null and b/data/car/car_0004.jpg differ diff --git a/data/car/car_0005.jpg b/data/car/car_0005.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4bcea137c788af2e6af822ebc1e2a53bff0ee11e Binary files /dev/null and b/data/car/car_0005.jpg differ diff --git a/data/car/car_0006.jpg b/data/car/car_0006.jpg new file mode 100644 index 0000000000000000000000000000000000000000..967ebd25e6e70e0761aee3a194ccf9ea402fd14e Binary files /dev/null and b/data/car/car_0006.jpg differ diff --git a/data/car/car_0007.jpg b/data/car/car_0007.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e53d1abcecbf236225ff001d8615948de3e3bc1e Binary files /dev/null and b/data/car/car_0007.jpg differ diff --git a/data/car/car_0008.jpg b/data/car/car_0008.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2c6c747f3bbe29124e27d7bc417a34a30c4a2b0b Binary files /dev/null and b/data/car/car_0008.jpg differ diff --git a/data/car/car_0009.jpg b/data/car/car_0009.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9c3cbd2cab6d283885b69872ac9eb4b94bf54482 Binary files /dev/null and b/data/car/car_0009.jpg differ diff --git a/data/car/car_0010.jpg b/data/car/car_0010.jpg new file mode 100644 index 0000000000000000000000000000000000000000..53c6413195852e54b08dd8c89a6a5d778b2747a7 Binary files /dev/null and b/data/car/car_0010.jpg differ diff --git a/data/car/car_0011.jpg b/data/car/car_0011.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9fe2d3aaef9caa27d4f15574b2f93a28235aa7e5 Binary files /dev/null and b/data/car/car_0011.jpg differ diff --git a/data/car/car_0012.jpg b/data/car/car_0012.jpg new file mode 100644 index 0000000000000000000000000000000000000000..499b3c0a5227c9c1c6eb56b1bbe71b1e44745c41 Binary files /dev/null and b/data/car/car_0012.jpg differ diff --git a/data/car/car_0013.jpg b/data/car/car_0013.jpg new file mode 100644 index 0000000000000000000000000000000000000000..23595db15ca9a4171b20c9603cd3eb2eb2ff09d6 Binary files /dev/null and b/data/car/car_0013.jpg differ diff --git a/data/car/car_0014.jpg b/data/car/car_0014.jpg new file mode 100644 index 0000000000000000000000000000000000000000..761d340c53026d482dd7a8e6dc80b21b8173c694 Binary files /dev/null and b/data/car/car_0014.jpg differ diff --git a/data/car/car_0015.jpg b/data/car/car_0015.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7a93fca2bcab18d70a8ded553af9181300274a4d Binary files /dev/null and b/data/car/car_0015.jpg differ diff --git a/data/car/car_0016.jpg b/data/car/car_0016.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d682ec1a4e757da66195c26b9ff5080e4695538d Binary files /dev/null and b/data/car/car_0016.jpg differ diff --git a/data/car/car_0017.jpg b/data/car/car_0017.jpg new file mode 100644 index 0000000000000000000000000000000000000000..dbaf379e988e86936da3f99eccdd0e939b621682 Binary files /dev/null and b/data/car/car_0017.jpg differ diff --git a/data/car/car_0018.jpg b/data/car/car_0018.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e8e748285ed62e8881465774cffd49cf2ccf00ef Binary files /dev/null and b/data/car/car_0018.jpg differ diff --git a/data/car/car_0019.jpg b/data/car/car_0019.jpg new file mode 100644 index 0000000000000000000000000000000000000000..192a78c7fb49731179c74a7951505efee7cd2c10 Binary files /dev/null and b/data/car/car_0019.jpg differ diff --git a/data/car/car_0020.jpg b/data/car/car_0020.jpg new file mode 100644 index 0000000000000000000000000000000000000000..86cd044dedf03fd9db9e521bb33123a46b094563 Binary files /dev/null and b/data/car/car_0020.jpg differ diff --git a/data/car/car_0021.jpg b/data/car/car_0021.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7251d3f235e51d75c4a80b8e5a3f2b866e0b87c7 Binary files /dev/null and b/data/car/car_0021.jpg differ diff --git a/data/car/car_0022.jpg b/data/car/car_0022.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a38a13f15cf91d278466aef79f962f190a8d7d04 Binary files /dev/null and b/data/car/car_0022.jpg differ diff --git a/data/car/car_0023.jpg b/data/car/car_0023.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7a2dd412fa68e284e7895f6544b5ad08df19c813 Binary files /dev/null and b/data/car/car_0023.jpg differ diff --git a/data/car/car_0024.jpg b/data/car/car_0024.jpg new file mode 100644 index 0000000000000000000000000000000000000000..69d1719c9d5beff6c5aed26f779f5ed104a59bed Binary files /dev/null and b/data/car/car_0024.jpg differ diff --git a/data/car/car_0025.jpg b/data/car/car_0025.jpg new file mode 100644 index 0000000000000000000000000000000000000000..70cea083ef9a644658d49a27c1d71d05769e3906 Binary files /dev/null and b/data/car/car_0025.jpg differ diff --git a/data/car/car_0026.jpg b/data/car/car_0026.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b8a2bad9aa27c142640d1398dd7b464cacbddf07 Binary files /dev/null and b/data/car/car_0026.jpg differ diff --git a/data/car/car_0027.jpg b/data/car/car_0027.jpg new file mode 100644 index 0000000000000000000000000000000000000000..697813cbd928e913108848641c07b158ab73fb00 Binary files /dev/null and b/data/car/car_0027.jpg differ diff --git a/data/car/car_0028.jpg b/data/car/car_0028.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0621f9e316f9275a2a886a247017b39724dbe9f8 Binary files /dev/null and b/data/car/car_0028.jpg differ diff --git a/data/car/car_0029.jpg b/data/car/car_0029.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6a3a006fdeb3ea3666a2e968f091ad2d1a434caf Binary files /dev/null and b/data/car/car_0029.jpg differ diff --git a/data/car/car_0030.jpg b/data/car/car_0030.jpg new file mode 100644 index 0000000000000000000000000000000000000000..34ff55039424c788a30d54522aaded73b06db462 Binary files /dev/null and b/data/car/car_0030.jpg differ diff --git a/data/cat/cat_0000.jpg b/data/cat/cat_0000.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2ba05ad2f1a02a81f6df3723cceb01c543dec605 Binary files /dev/null and b/data/cat/cat_0000.jpg differ diff --git a/data/cat/cat_0001.jpg b/data/cat/cat_0001.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1e6283f81f90c2016104e36d9979c25f65f2de95 Binary files /dev/null and b/data/cat/cat_0001.jpg differ diff --git a/data/cat/cat_0002.jpg b/data/cat/cat_0002.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2292b70dc44467875d2a3be5b5c318d2a8c713c3 Binary files /dev/null and b/data/cat/cat_0002.jpg differ diff --git a/data/cat/cat_0003.jpg b/data/cat/cat_0003.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7dc7b86f5442e75822a546fc847b15847cd07e7a Binary files /dev/null and b/data/cat/cat_0003.jpg differ diff --git a/data/cat/cat_0004.jpg b/data/cat/cat_0004.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c3db7a57fa20538f6e00f071a55166187ce81c5a Binary files /dev/null and b/data/cat/cat_0004.jpg differ diff --git a/data/cat/cat_0005.jpg b/data/cat/cat_0005.jpg new file mode 100644 index 0000000000000000000000000000000000000000..530ee13012e5cc112abc1f16607cbebab57c2c89 Binary files /dev/null and b/data/cat/cat_0005.jpg differ diff --git a/data/cat/cat_0006.jpg b/data/cat/cat_0006.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d9b00c32f55d52fdd52e99ac0db97a26bb3d0592 Binary files /dev/null and b/data/cat/cat_0006.jpg differ diff --git a/data/cat/cat_0007.jpg b/data/cat/cat_0007.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f1c140f715246064aacb7c2b94c5c8b1fc5f21df Binary files /dev/null and b/data/cat/cat_0007.jpg differ diff --git a/data/cat/cat_0008.jpg b/data/cat/cat_0008.jpg new file mode 100644 index 0000000000000000000000000000000000000000..72c4db0ab1c9df2eefb6eaede818f0b82ad3fcf1 Binary files /dev/null and b/data/cat/cat_0008.jpg differ diff --git a/data/cat/cat_0009.jpg b/data/cat/cat_0009.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b15cd7f753127eaac07c8c22c4fba6997b8c5944 Binary files /dev/null and b/data/cat/cat_0009.jpg differ diff --git a/data/cat/cat_0010.jpg b/data/cat/cat_0010.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ad9df77a263ced1208ec9b27efae123d589ca1ab Binary files /dev/null and b/data/cat/cat_0010.jpg differ diff --git a/data/cat/cat_0011.jpg b/data/cat/cat_0011.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d1eb04db2e195f1a3f5d373effebcfdbcd3e2bed Binary files /dev/null and b/data/cat/cat_0011.jpg differ diff --git a/data/cat/cat_0012.jpg b/data/cat/cat_0012.jpg new file mode 100644 index 0000000000000000000000000000000000000000..54d5cc192e994d2410e789e91e3472f3e3dfe50f Binary files /dev/null and b/data/cat/cat_0012.jpg differ diff --git a/data/cat/cat_0013.jpg b/data/cat/cat_0013.jpg new file mode 100644 index 0000000000000000000000000000000000000000..017dea0cac124db47ce0949df0300e7827670585 Binary files /dev/null and b/data/cat/cat_0013.jpg differ diff --git a/data/cat/cat_0014.jpg b/data/cat/cat_0014.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a3f96a95ece2dc7ca67df6742bb29a3e9b0b6080 Binary files /dev/null and b/data/cat/cat_0014.jpg differ diff --git a/data/cat/cat_0015.jpg b/data/cat/cat_0015.jpg new file mode 100644 index 0000000000000000000000000000000000000000..21bcd234d1cf22d17c5cf2910eed1239301e20ae Binary files /dev/null and b/data/cat/cat_0015.jpg differ diff --git a/data/cat/cat_0016.jpg b/data/cat/cat_0016.jpg new file mode 100644 index 0000000000000000000000000000000000000000..50c01ea5fa5a58f340a764b09d752f69ccb412a6 Binary files /dev/null and b/data/cat/cat_0016.jpg differ diff --git a/data/cat/cat_0017.jpg b/data/cat/cat_0017.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2565bd35223098626ea959ff10b5d9b494adcd23 Binary files /dev/null and b/data/cat/cat_0017.jpg differ diff --git a/data/cat/cat_0018.jpg b/data/cat/cat_0018.jpg new file mode 100644 index 0000000000000000000000000000000000000000..88c0d080e61136680e4dbfcbf2c5f69f342ade31 Binary files /dev/null and b/data/cat/cat_0018.jpg differ diff --git a/data/cat/cat_0019.jpg b/data/cat/cat_0019.jpg new file mode 100644 index 0000000000000000000000000000000000000000..246cbd14c781f6f97e540df2d4400bde97ee6011 Binary files /dev/null and b/data/cat/cat_0019.jpg differ diff --git a/data/cat/cat_0020.jpg b/data/cat/cat_0020.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3c6ad7bb6c3d3648b9855433c3fcb6874f34a05f Binary files /dev/null and b/data/cat/cat_0020.jpg differ diff --git a/data/cat/cat_0021.jpg b/data/cat/cat_0021.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f505b9cd2b1bb24062f2157b51b24e6856496fb3 Binary files /dev/null and b/data/cat/cat_0021.jpg differ diff --git a/data/cat/cat_0022.jpg b/data/cat/cat_0022.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d4de9d1555f5c9499d54e434663d79a433ff117c Binary files /dev/null and b/data/cat/cat_0022.jpg differ diff --git a/data/cat/cat_0023.jpg b/data/cat/cat_0023.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2b4e8a4c1ecf8a72f08150d713c83ee572f70d5f Binary files /dev/null and b/data/cat/cat_0023.jpg differ diff --git a/data/cat/cat_0024.jpg b/data/cat/cat_0024.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e7c37befa7ae7279f358428f3224fc5dc68cb744 Binary files /dev/null and b/data/cat/cat_0024.jpg differ diff --git a/data/cat/cat_0025.jpg b/data/cat/cat_0025.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c90ad554fc1b4bf84569835317a322f3905ee0b5 Binary files /dev/null and b/data/cat/cat_0025.jpg differ diff --git a/data/cat/cat_0026.jpg b/data/cat/cat_0026.jpg new file mode 100644 index 0000000000000000000000000000000000000000..02674c95a2bb796bcafa0592fd3ae04e564d7ad9 Binary files /dev/null and b/data/cat/cat_0026.jpg differ diff --git a/data/cat/cat_0027.jpg b/data/cat/cat_0027.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7c7a1c04cbce5b9ffc91729593f5c501f9c8ca48 Binary files /dev/null and b/data/cat/cat_0027.jpg differ diff --git a/data/cat/cat_0028.jpg b/data/cat/cat_0028.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cdcb92427f220a64e3e34c92bce049247605b998 Binary files /dev/null and b/data/cat/cat_0028.jpg differ diff --git a/data/cat/cat_0029.jpg b/data/cat/cat_0029.jpg new file mode 100644 index 0000000000000000000000000000000000000000..93dcda5f1f637ef0edbecebd12b38c7a16c5fec1 Binary files /dev/null and b/data/cat/cat_0029.jpg differ diff --git a/data/cat/cat_0030.jpg b/data/cat/cat_0030.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9419c7349bc5039696ee5bfc7e06f35465ad85e6 Binary files /dev/null and b/data/cat/cat_0030.jpg differ diff --git a/data/dog/dog_0000.jpg b/data/dog/dog_0000.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f99ca3290ba2b8149f3b93551394b94484b1f513 Binary files /dev/null and b/data/dog/dog_0000.jpg differ diff --git a/data/dog/dog_0001.jpg b/data/dog/dog_0001.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b764ab270b47f80a517c8088d0b5ef1ea05b0618 Binary files /dev/null and b/data/dog/dog_0001.jpg differ diff --git a/data/dog/dog_0002.jpg b/data/dog/dog_0002.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2109bd45d68be3b2c20564e58ecde7237d481c2c Binary files /dev/null and b/data/dog/dog_0002.jpg differ diff --git a/data/dog/dog_0003.jpg b/data/dog/dog_0003.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cce3f07b985305dcf7d63806889ecd2524873157 Binary files /dev/null and b/data/dog/dog_0003.jpg differ diff --git a/data/dog/dog_0004.jpg b/data/dog/dog_0004.jpg new file mode 100644 index 0000000000000000000000000000000000000000..72139c0b3d0da59c5000705d18d7403d3400aef1 Binary files /dev/null and b/data/dog/dog_0004.jpg differ diff --git a/data/dog/dog_0005.jpg b/data/dog/dog_0005.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a48108718f72aa47e9e314daac0519ff0c38e86a Binary files /dev/null and b/data/dog/dog_0005.jpg differ diff --git a/data/dog/dog_0006.jpg b/data/dog/dog_0006.jpg new file mode 100644 index 0000000000000000000000000000000000000000..73a6efd55002128c4a22df3dadcf69ec342493f6 Binary files /dev/null and b/data/dog/dog_0006.jpg differ diff --git a/data/dog/dog_0007.jpg b/data/dog/dog_0007.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7476c7cfe594cdaf42e39dcca05e5d50cf26ae06 Binary files /dev/null and b/data/dog/dog_0007.jpg differ diff --git a/data/dog/dog_0008.jpg b/data/dog/dog_0008.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b86555ab1265474d6b5a82f6296a86afe8ca9353 Binary files /dev/null and b/data/dog/dog_0008.jpg differ diff --git a/data/dog/dog_0009.jpg b/data/dog/dog_0009.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8d6b389a3255221e62eca07f627655933115c1d4 Binary files /dev/null and b/data/dog/dog_0009.jpg differ diff --git a/data/dog/dog_0010.jpg b/data/dog/dog_0010.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8202b97f5ce35d33fef9b66b2175020594bd529b Binary files /dev/null and b/data/dog/dog_0010.jpg differ diff --git a/data/dog/dog_0011.jpg b/data/dog/dog_0011.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ca8fa0a917d26d0b9cba1bf736da770ede8a4426 Binary files /dev/null and b/data/dog/dog_0011.jpg differ diff --git a/data/dog/dog_0012.jpg b/data/dog/dog_0012.jpg new file mode 100644 index 0000000000000000000000000000000000000000..93d119dc6b0aa47c6070bc3916b57cb9500e4ae9 Binary files /dev/null and b/data/dog/dog_0012.jpg differ diff --git a/data/dog/dog_0013.jpg b/data/dog/dog_0013.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b6bc3c7995c6c88f8505f14401c3855e9326d08f Binary files /dev/null and b/data/dog/dog_0013.jpg differ diff --git a/data/dog/dog_0014.jpg b/data/dog/dog_0014.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f30254e6f1a0810ba789cb48a860a36804437514 Binary files /dev/null and b/data/dog/dog_0014.jpg differ diff --git a/data/dog/dog_0015.jpg b/data/dog/dog_0015.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5d9d93a9975366715654620e9d92ae2f10699463 Binary files /dev/null and b/data/dog/dog_0015.jpg differ diff --git a/data/dog/dog_0016.jpg b/data/dog/dog_0016.jpg new file mode 100644 index 0000000000000000000000000000000000000000..95f123332dce0507e1fd2146f86752c6b59d2ab6 Binary files /dev/null and b/data/dog/dog_0016.jpg differ diff --git a/data/dog/dog_0017.jpg b/data/dog/dog_0017.jpg new file mode 100644 index 0000000000000000000000000000000000000000..697ad24b3cea3994432840a5ea4434084c4f44c4 Binary files /dev/null and b/data/dog/dog_0017.jpg differ diff --git a/data/dog/dog_0018.jpg b/data/dog/dog_0018.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1f88e28b643696b92c062421c3d28f96fee17891 Binary files /dev/null and b/data/dog/dog_0018.jpg differ diff --git a/data/dog/dog_0019.jpg b/data/dog/dog_0019.jpg new file mode 100644 index 0000000000000000000000000000000000000000..dfcbac505052f984c0aba8aed57ae85c5475444d Binary files /dev/null and b/data/dog/dog_0019.jpg differ diff --git a/data/dog/dog_0020.jpg b/data/dog/dog_0020.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6a798a528917c02afed76caab119ff87439bf993 Binary files /dev/null and b/data/dog/dog_0020.jpg differ diff --git a/data/dog/dog_0021.jpg b/data/dog/dog_0021.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2456f734a4e93d494a0161cab1aa13ba3ad93d3d Binary files /dev/null and b/data/dog/dog_0021.jpg differ diff --git a/data/dog/dog_0022.jpg b/data/dog/dog_0022.jpg new file mode 100644 index 0000000000000000000000000000000000000000..88095263dc17a0d348c08431b83ea4cae8e3778f Binary files /dev/null and b/data/dog/dog_0022.jpg differ diff --git a/data/dog/dog_0023.jpg b/data/dog/dog_0023.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5874418ed2f1c0b20baf0c2413999c69bc790ec9 Binary files /dev/null and b/data/dog/dog_0023.jpg differ diff --git a/data/dog/dog_0024.jpg b/data/dog/dog_0024.jpg new file mode 100644 index 0000000000000000000000000000000000000000..27491f11de233774dafb21fc56b07e34889e3783 Binary files /dev/null and b/data/dog/dog_0024.jpg differ diff --git a/data/dog/dog_0025.jpg b/data/dog/dog_0025.jpg new file mode 100644 index 0000000000000000000000000000000000000000..483740afecd910803374a7fab996f7e60afd80bd Binary files /dev/null and b/data/dog/dog_0025.jpg differ diff --git a/data/dog/dog_0026.jpg b/data/dog/dog_0026.jpg new file mode 100644 index 0000000000000000000000000000000000000000..83d8389335d81781cb886853689fbe2f8d0c98e3 Binary files /dev/null and b/data/dog/dog_0026.jpg differ diff --git a/data/dog/dog_0027.jpg b/data/dog/dog_0027.jpg new file mode 100644 index 0000000000000000000000000000000000000000..918b71e86d9b8ff081cc7434f9e1f7e5352b9db0 Binary files /dev/null and b/data/dog/dog_0027.jpg differ diff --git a/data/dog/dog_0028.jpg b/data/dog/dog_0028.jpg new file mode 100644 index 0000000000000000000000000000000000000000..71cc116d08394a6f0ac8bda945c405bfa5cbb034 Binary files /dev/null and b/data/dog/dog_0028.jpg differ diff --git a/data/dog/dog_0029.jpg b/data/dog/dog_0029.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7f2a29e139be245b19b6aef2c9a458690f48b7bc Binary files /dev/null and b/data/dog/dog_0029.jpg differ diff --git a/data/dog/dog_0030.jpg b/data/dog/dog_0030.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9945deea9c776a6dfe0a2606c63215858a83e6e4 Binary files /dev/null and b/data/dog/dog_0030.jpg differ diff --git a/datastore.py b/datastore.py new file mode 100644 index 0000000000000000000000000000000000000000..dadc1ad4433698b38cf3107a6841fc997e7e318a --- /dev/null +++ b/datastore.py @@ -0,0 +1,112 @@ +__import__("pysqlite3") +import sys + +sys.modules["sqlite3"] = sys.modules.pop("pysqlite3") + + +import uuid +from collections import defaultdict +from typing import Any, List + +import chromadb +import numpy as np +from chromadb import Collection +from embeddings import Embedding +from PIL.Image import Image + +from utils import base64_to_image + + +class ChromaStore: + def __init__( + self, + collection_name: str, + storage_path: str = "./chroma", + database: str = "database", + metadata: dict = {"hnsw:space": "cosine"}, + ) -> None: + """Initiate Chromadb + - collection_name(str): name of the collection + - metadata(dict): available options for 'hnsw:space' are 'l2', 'ip' or 'cosine'. + """ + + self.collection_name = collection_name + self.metadata = metadata + self.storage_path = storage_path + self.database = database + + self.client = chromadb.PersistentClient(path=self.storage_path) + + def _health_check(self) -> bool: + return isinstance(self.client.heartbeat(), int) + + def generate_embeddings( + self, images: List[Image], embedding: Embedding + ) -> np.ndarray: + return embedding.encode_images(images) + + def create(self): + collection = self.client.get_or_create_collection( + name=self.collection_name, + ) + return collection + + def add( + self, + collection: Collection, + embeddings: List[float], + documents: List[str], + ids: List[str], + ): + """Add embeddings, documents to index or collection. + + Args: + - collection: created collection. + - embeddings: list of image embeddings + - documents: list of base64 string of images + - ids: list of ids for images.""" + try: + collection.add( + embeddings=embeddings, + ids=ids, + documents=documents, + ) + except Exception as e: + raise Exception(f"Failed to add documents to Chroma store. {e}") + + def query( + self, + collection: Collection, + query_embedding: List[float], + top_k: int = 3, + ) -> list: + """Retrieve relevant images from chroma database. + + Args: + - collection: created collection. + - query_embedding: query image embedding. + - top_k (int): top k images to retrieve. + + Returns: + - list of images along with their score. + """ + result = collection.query(query_embeddings=query_embedding, n_results=top_k) + relevant_images = [ + base64_to_image(img_str) for img_str in result["documents"][0] + ] + scores = [round(score, 3) for score in result["distances"][0]] + return list(zip(relevant_images, scores)) + + def delete(self, collection_name: str): + try: + self.client.delete_collection(collection_name) + return True + except Exception as e: + raise Exception("Failed to delete collection", e) + + @staticmethod + def collection_info(collection: Collection): + info = defaultdict(str) + info["count"] = collection.count() + info["top_10_items"] = collection.peek() + return info diff --git a/embeddings.py b/embeddings.py new file mode 100644 index 0000000000000000000000000000000000000000..92af082e748920c36333692b9d57b6bf23a1a338 --- /dev/null +++ b/embeddings.py @@ -0,0 +1,52 @@ +from typing import List + +import torch +from PIL import Image +from transformers import CLIPModel, CLIPProcessor, CLIPTokenizer + + +class Embedding: + """ + A class for encoding images and text using the CLIP model. + """ + + device = "cuda" if torch.cuda.is_available() else "cpu" + processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") + model = CLIPModel.from_pretrained( + "openai/clip-vit-base-patch32", + cache_dir="./models", + ) + tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32") + model.to(device) + + @classmethod + def encode_image(cls, image: Image): + """ + Encode an image using the specified image processor and model, and return the image features as a numpy array. + + Args: + image (Image): The input image to be encoded. + + Returns: + np.ndarray: The encoded image features as a numpy array. + """ + inputs = cls.processor(images=image, return_tensors="pt")["pixel_values"].to( + cls.device + ) + outputs = cls.model.get_image_features(inputs) + return outputs.cpu().detach().numpy() + + @classmethod + def encode_text(cls, text: str): + """ + Encode the input text using the tokenizer and model, and return the resulting numpy embedding. + + Args: + text (str): The input text to be encoded. + + Returns: + numpy array: The encoded text features as a numpy array. + """ + inputs = cls.tokenizer(text, return_tensors="pt").to(cls.device) + outputs = cls.model.get_text_features(**inputs) + return outputs.cpu().detach().numpy() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..39426d47ba6c7e104da45d2cad1f15ada2280584 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,153 @@ +altair==5.3.0 +annotated-types==0.7.0 +anyio==4.4.0 +asgiref==3.8.1 +asttokens==2.4.1 +attrs==24.2.0 +backoff==2.2.1 +bcrypt==4.2.0 +blinker==1.8.2 +build==1.2.1 +cachetools==5.4.0 +certifi==2024.7.4 +charset-normalizer==3.3.2 +chroma-hnswlib==0.7.6 +chromadb==0.5.5 +click==8.1.7 +coloredlogs==15.0.1 +comm==0.2.2 +debugpy==1.8.5 +decorator==5.1.1 +Deprecated==1.2.14 +exceptiongroup==1.2.2 +executing==2.0.1 +fastapi==0.112.0 +filelock==3.15.4 +flatbuffers==24.3.25 +fsspec==2024.6.1 +gitdb==4.0.11 +GitPython==3.1.43 +google-auth==2.33.0 +googleapis-common-protos==1.63.2 +grpcio==1.65.4 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +huggingface-hub==0.24.5 +humanfriendly==10.0 +idna==3.7 +importlib_metadata==8.0.0 +importlib_resources==6.4.0 +ipykernel==6.29.5 +ipython==8.26.0 +jedi==0.19.1 +Jinja2==3.1.4 +jsonschema==4.23.0 +jsonschema-specifications==2023.12.1 +jupyter_client==8.6.2 +jupyter_core==5.7.2 +kubernetes==30.1.0 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +matplotlib-inline==0.1.7 +mdurl==0.1.2 +mmh3==4.1.0 +monotonic==1.6 +mpmath==1.3.0 +nest-asyncio==1.6.0 +networkx==3.3 +numpy==1.26.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==9.1.0.70 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.6.20 +nvidia-nvtx-cu12==12.1.105 +oauthlib==3.2.2 +onnxruntime==1.18.1 +opentelemetry-api==1.26.0 +opentelemetry-exporter-otlp-proto-common==1.26.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +opentelemetry-instrumentation==0.47b0 +opentelemetry-instrumentation-asgi==0.47b0 +opentelemetry-instrumentation-fastapi==0.47b0 +opentelemetry-proto==1.26.0 +opentelemetry-sdk==1.26.0 +opentelemetry-semantic-conventions==0.47b0 +opentelemetry-util-http==0.47b0 +orjson==3.10.7 +overrides==7.7.0 +packaging==24.1 +pandas==2.2.2 +parso==0.8.4 +pexpect==4.9.0 +pillow==10.4.0 +platformdirs==4.2.2 +posthog==3.5.0 +prompt_toolkit==3.0.47 +protobuf==4.25.4 +psutil==6.0.0 +ptyprocess==0.7.0 +pure_eval==0.2.3 +pyarrow==17.0.0 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 +pydantic==2.8.2 +pydantic_core==2.20.1 +pydeck==0.9.1 +Pygments==2.18.0 +PyPika==0.48.9 +pyproject_hooks==1.1.0 +pysqlite3-binary==0.5.3 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytz==2024.1 +PyYAML==6.0.2 +pyzmq==26.1.0 +referencing==0.35.1 +regex==2024.7.24 +requests==2.32.3 +requests-oauthlib==2.0.0 +rich==13.7.1 +rpds-py==0.20.0 +rsa==4.9 +safetensors==0.4.4 +shellingham==1.5.4 +six==1.16.0 +smmap==5.0.1 +sniffio==1.3.1 +stack-data==0.6.3 +starlette==0.37.2 +streamlit==1.37.1 +sympy==1.13.1 +tenacity==8.5.0 +tokenizers==0.19.1 +toml==0.10.2 +tomli==2.0.1 +toolz==0.12.1 +torch==2.4.0 +tornado==6.4.1 +tqdm==4.66.5 +traitlets==5.14.3 +transformers==4.44.0 +triton==3.0.0 +typer==0.12.3 +typing_extensions==4.12.2 +tzdata==2024.1 +urllib3==2.2.2 +uvicorn==0.30.5 +uvloop==0.19.0 +watchdog==4.0.1 +watchfiles==0.23.0 +wcwidth==0.2.13 +websocket-client==1.8.0 +websockets==12.0 +wrapt==1.16.0 +zipp==3.19.2 diff --git a/sample_imgs/airplane_0001.jpg b/sample_imgs/airplane_0001.jpg new file mode 100644 index 0000000000000000000000000000000000000000..522ff6a996ab81cfa1719c0346e3b99d9943a4cb Binary files /dev/null and b/sample_imgs/airplane_0001.jpg differ diff --git a/sample_imgs/car_0003.jpg b/sample_imgs/car_0003.jpg new file mode 100644 index 0000000000000000000000000000000000000000..21e44b6dd23716f04568b0a6094008f9c45f78d0 Binary files /dev/null and b/sample_imgs/car_0003.jpg differ diff --git a/sample_imgs/cat_0008.jpg b/sample_imgs/cat_0008.jpg new file mode 100644 index 0000000000000000000000000000000000000000..72c4db0ab1c9df2eefb6eaede818f0b82ad3fcf1 Binary files /dev/null and b/sample_imgs/cat_0008.jpg differ diff --git a/sample_imgs/dog_0017.jpg b/sample_imgs/dog_0017.jpg new file mode 100644 index 0000000000000000000000000000000000000000..697ad24b3cea3994432840a5ea4434084c4f44c4 Binary files /dev/null and b/sample_imgs/dog_0017.jpg differ diff --git a/utils.py b/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..bde8959adb2f0ed0ea25b2eb704b6e0af2562562 --- /dev/null +++ b/utils.py @@ -0,0 +1,28 @@ +import base64 +import io +import uuid + +from PIL import Image + + +def image_to_base64(img: Image) -> str: + buffer = io.BytesIO() + img.save(buffer, format="JPEG") + img_byte_str = buffer.getvalue() + return base64.b64encode(img_byte_str, altchars=b"-_").decode("utf-8") + + +def base64_to_image(base64_str: str) -> Image: + img_data = base64.b64decode(base64_str, altchars=b"-_") + img_buffer = io.BytesIO(img_data) + img = Image.open(img_buffer) + return img + + +def create_ids_for_images(images: list): + """Generate ids for each image.""" + return [str(uuid.uuid5(uuid.NAMESPACE_URL, img)) for img in images] + + +def create_id_for_image(image: Image): + return str(uuid.uuid5(uuid.NAMESPACE_URL, image))