from datasets import Dataset, DatasetDict, load_dataset, Features, ClassLabel, Array3D from huggingface_hub import HfApi import os from PIL import Image import numpy as np import streamlit as st # Define the target image size IMG_SIZE = (128, 128) # Change from (224, 224) to (128, 128) def load_images_from_folder(folder): images = [] labels = [] label_names = sorted(os.listdir(folder)) for i, label in enumerate(label_names): label_folder = os.path.join(folder, label) if os.path.isdir(label_folder): for img_file in os.listdir(label_folder): img_path = os.path.join(label_folder, img_file) if img_path.lower().endswith((".png", ".jpg", ".jpeg")): try: img = Image.open(img_path).convert("RGB") img = img.resize(IMG_SIZE) # Resize to 128x128 img_np = np.array(img, dtype=np.uint8) # Convert to NumPy array images.append(img_np) labels.append(i) except Exception as e: print(f"Error processing image {img_path}: {e}") continue # Skip problematic images return Dataset.from_dict( {"image": images, "label": labels}, features=Features({ "image": Array3D(shape=(128, 128, 3), dtype="uint8"), # Update shape to (128, 128, 3) "label": ClassLabel(names=label_names) }) ) def main(): st.title("Upload Cats and Dogs Dataset to Hugging Face Hub") st.write("Download the archive of images from [this link](https://drive.google.com/file/d/11rYftkuiAUA4cdejsGEntUfhNfEPTrM8/view?usp=sharing) and extract it to the `dataset` folder.") st.warning("This script will not work when run from the HuggingFace Space.") #remove the stop once you have a copy of the dataset #you will ned to run this script from your local machine st.stop() # Create dataset dictionary dataset = DatasetDict({ "train": load_images_from_folder("dataset/train_set"), "test": load_images_from_folder("dataset/test_set") }) # Push dataset to Hugging Face Hub repo_id = "cats_dogs_dataset" # Choose a dataset name HF_TOKEN = os.getenv("HF_TOKEN") dataset.push_to_hub(repo_id, token=HF_TOKEN, commit_message="Initial dataset upload") st.write(f"Dataset uploaded to {repo_id}") if __name__ == "__main__": main()