Spaces:
Sleeping
Sleeping
from datasets import Dataset, DatasetDict, load_dataset, Features, ClassLabel, Array3D | |
from huggingface_hub import HfApi | |
import os | |
from PIL import Image | |
import numpy as np | |
import streamlit as st | |
# Define the target image size | |
IMG_SIZE = (128, 128) # Change from (224, 224) to (128, 128) | |
def load_images_from_folder(folder): | |
images = [] | |
labels = [] | |
label_names = sorted(os.listdir(folder)) | |
for i, label in enumerate(label_names): | |
label_folder = os.path.join(folder, label) | |
if os.path.isdir(label_folder): | |
for img_file in os.listdir(label_folder): | |
img_path = os.path.join(label_folder, img_file) | |
if img_path.lower().endswith((".png", ".jpg", ".jpeg")): | |
try: | |
img = Image.open(img_path).convert("RGB") | |
img = img.resize(IMG_SIZE) # Resize to 128x128 | |
img_np = np.array(img, dtype=np.uint8) # Convert to NumPy array | |
images.append(img_np) | |
labels.append(i) | |
except Exception as e: | |
print(f"Error processing image {img_path}: {e}") | |
continue # Skip problematic images | |
return Dataset.from_dict( | |
{"image": images, "label": labels}, | |
features=Features({ | |
"image": Array3D(shape=(128, 128, 3), dtype="uint8"), # Update shape to (128, 128, 3) | |
"label": ClassLabel(names=label_names) | |
}) | |
) | |
def main(): | |
st.title("Upload Cats and Dogs Dataset to Hugging Face Hub") | |
st.write("Download the archive of images from [this link](https://drive.google.com/file/d/11rYftkuiAUA4cdejsGEntUfhNfEPTrM8/view?usp=sharing) and extract it to the `dataset` folder.") | |
st.warning("This script will not work when run from the HuggingFace Space.") | |
#remove the stop once you have a copy of the dataset | |
#you will ned to run this script from your local machine | |
st.stop() | |
# Create dataset dictionary | |
dataset = DatasetDict({ | |
"train": load_images_from_folder("dataset/train_set"), | |
"test": load_images_from_folder("dataset/test_set") | |
}) | |
# Push dataset to Hugging Face Hub | |
repo_id = "cats_dogs_dataset" # Choose a dataset name | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
dataset.push_to_hub(repo_id, token=HF_TOKEN, commit_message="Initial dataset upload") | |
st.write(f"Dataset uploaded to {repo_id}") | |
if __name__ == "__main__": | |
main() |