Spaces:
Sleeping
Sleeping
File size: 2,503 Bytes
9321c59 c92fb7d 9321c59 a5196c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
from datasets import Dataset, DatasetDict, load_dataset, Features, ClassLabel, Array3D
from huggingface_hub import HfApi
import os
from PIL import Image
import numpy as np
import streamlit as st
# Define the target image size
IMG_SIZE = (128, 128) # Change from (224, 224) to (128, 128)
def load_images_from_folder(folder):
images = []
labels = []
label_names = sorted(os.listdir(folder))
for i, label in enumerate(label_names):
label_folder = os.path.join(folder, label)
if os.path.isdir(label_folder):
for img_file in os.listdir(label_folder):
img_path = os.path.join(label_folder, img_file)
if img_path.lower().endswith((".png", ".jpg", ".jpeg")):
try:
img = Image.open(img_path).convert("RGB")
img = img.resize(IMG_SIZE) # Resize to 128x128
img_np = np.array(img, dtype=np.uint8) # Convert to NumPy array
images.append(img_np)
labels.append(i)
except Exception as e:
print(f"Error processing image {img_path}: {e}")
continue # Skip problematic images
return Dataset.from_dict(
{"image": images, "label": labels},
features=Features({
"image": Array3D(shape=(128, 128, 3), dtype="uint8"), # Update shape to (128, 128, 3)
"label": ClassLabel(names=label_names)
})
)
def main():
st.title("Upload Cats and Dogs Dataset to Hugging Face Hub")
st.write("Download the archive of images from [this link](https://drive.google.com/file/d/11rYftkuiAUA4cdejsGEntUfhNfEPTrM8/view?usp=sharing) and extract it to the `dataset` folder.")
st.warning("This script will not work when run from the HuggingFace Space.")
#remove the stop once you have a copy of the dataset
#you will ned to run this script from your local machine
st.stop()
# Create dataset dictionary
dataset = DatasetDict({
"train": load_images_from_folder("dataset/train_set"),
"test": load_images_from_folder("dataset/test_set")
})
# Push dataset to Hugging Face Hub
repo_id = "cats_dogs_dataset" # Choose a dataset name
HF_TOKEN = os.getenv("HF_TOKEN")
dataset.push_to_hub(repo_id, token=HF_TOKEN, commit_message="Initial dataset upload")
st.write(f"Dataset uploaded to {repo_id}")
if __name__ == "__main__":
main() |