louiecerv commited on
Commit
9321c59
·
1 Parent(s): 961e0f0

sync to remote

Browse files
Files changed (2) hide show
  1. app.py +64 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import Dataset, DatasetDict, load_dataset, Features, ClassLabel, Array3D
2
+ from huggingface_hub import HfApi
3
+ import os
4
+ from PIL import Image
5
+ import numpy as np
6
+ import streamlit as st
7
+
8
+ # Define the target image size
9
+ IMG_SIZE = (128, 128) # Change from (224, 224) to (128, 128)
10
+
11
+ def load_images_from_folder(folder):
12
+ images = []
13
+ labels = []
14
+ label_names = sorted(os.listdir(folder))
15
+
16
+ for i, label in enumerate(label_names):
17
+ label_folder = os.path.join(folder, label)
18
+ if os.path.isdir(label_folder):
19
+ for img_file in os.listdir(label_folder):
20
+ img_path = os.path.join(label_folder, img_file)
21
+ if img_path.lower().endswith((".png", ".jpg", ".jpeg")):
22
+ try:
23
+ img = Image.open(img_path).convert("RGB")
24
+ img = img.resize(IMG_SIZE) # Resize to 128x128
25
+ img_np = np.array(img, dtype=np.uint8) # Convert to NumPy array
26
+
27
+ images.append(img_np)
28
+ labels.append(i)
29
+
30
+ except Exception as e:
31
+ print(f"Error processing image {img_path}: {e}")
32
+ continue # Skip problematic images
33
+
34
+ return Dataset.from_dict(
35
+ {"image": images, "label": labels},
36
+ features=Features({
37
+ "image": Array3D(shape=(128, 128, 3), dtype="uint8"), # Update shape to (128, 128, 3)
38
+ "label": ClassLabel(names=label_names)
39
+ })
40
+ )
41
+
42
+ def main():
43
+
44
+ st.title("Upload Cats and Dogs Dataset to Hugging Face Hub")
45
+ st.write("Download the archive of images from [this link](https://drive.google.com/uc?id=1Jq0tQqX5u3J8QfQqJ6f8v3v9XJ8h6Z1n) and extract it to the `dataset` folder.")
46
+ st.warning("This script will not work when run from the HuggingFace Space.")
47
+
48
+ #remove the stop once you have a copy of the dataset
49
+ #you will ned to run this script from your local machine
50
+ st.stop()
51
+
52
+ # Create dataset dictionary
53
+ dataset = DatasetDict({
54
+ "train": load_images_from_folder("dataset/train_set"),
55
+ "test": load_images_from_folder("dataset/test_set")
56
+ })
57
+
58
+ # Push dataset to Hugging Face Hub
59
+ repo_id = "cats_dogs_dataset" # Choose a dataset name
60
+ HF_TOKEN = os.getenv("HF_TOKEN")
61
+
62
+ dataset.push_to_hub(repo_id, token=HF_TOKEN, commit_message="Initial dataset upload")
63
+
64
+ st.write(f"Dataset uploaded to {repo_id}")
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ datasets
2
+ huggingface_hub
3
+ Pillow
4
+ pyarrow
5
+ numpy
6
+ matplotlib