Commit
·
cb4591a
1
Parent(s):
ae47e4d
Update eda.py
Browse files
eda.py
CHANGED
@@ -34,138 +34,7 @@ def run():
|
|
34 |
dataset_path = "andrewsunanda/fast_food_image_classification"
|
35 |
st.write(os.listdir(dataset_path))
|
36 |
|
37 |
-
|
38 |
-
import torch
|
39 |
-
import torchvision.transforms as transforms
|
40 |
-
from torch.utils.data import DataLoader
|
41 |
-
from datasets import load_dataset
|
42 |
-
|
43 |
-
# Define the path to the dataset
|
44 |
-
dataset_path = 'andrewsunanda/fast_food_image_classification'
|
45 |
-
|
46 |
-
# Load the dataset from Hugging Face
|
47 |
-
dataset = load_dataset(dataset_path)
|
48 |
-
|
49 |
-
# Define the batch size and image size
|
50 |
-
batch_size = 256
|
51 |
-
img_size = (64, 64)
|
52 |
-
|
53 |
-
# Define the paths to the train, validation, and test folders
|
54 |
-
train_path = os.path.join(dataset_path, 'Train')
|
55 |
-
valid_path = os.path.join(dataset_path, 'Valid')
|
56 |
-
test_path = os.path.join(dataset_path, 'Test')
|
57 |
-
|
58 |
-
# Define the transforms for the dataset
|
59 |
-
transform = transforms.Compose([
|
60 |
-
transforms.Resize(img_size),
|
61 |
-
transforms.ToTensor(),
|
62 |
-
])
|
63 |
-
|
64 |
-
# Load the training dataset
|
65 |
-
train_dataset = dataset['train']
|
66 |
-
train_dataset = train_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
|
67 |
-
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
68 |
-
|
69 |
-
# Load the validation dataset
|
70 |
-
valid_dataset = dataset['validation']
|
71 |
-
valid_dataset = valid_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
|
72 |
-
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
|
73 |
-
|
74 |
-
# Load the testing dataset
|
75 |
-
test_dataset = dataset['test']
|
76 |
-
test_dataset = test_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
|
77 |
-
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
|
78 |
-
|
79 |
-
# Create data generators for training, validation, and testing
|
80 |
-
train_datagen = ImageDataGenerator(
|
81 |
-
rescale=1./255,
|
82 |
-
horizontal_flip=True
|
83 |
-
)
|
84 |
-
|
85 |
-
valid_datagen = ImageDataGenerator(
|
86 |
-
rescale=1./255
|
87 |
-
)
|
88 |
-
test_datagen = ImageDataGenerator(
|
89 |
-
rescale=1./255
|
90 |
-
)
|
91 |
-
|
92 |
-
train_generator = train_datagen.flow_from_directory(
|
93 |
-
train_path,
|
94 |
-
target_size=img_size,
|
95 |
-
batch_size=batch_size,
|
96 |
-
class_mode='categorical'
|
97 |
-
)
|
98 |
-
|
99 |
-
valid_generator = valid_datagen.flow_from_directory(
|
100 |
-
valid_path,
|
101 |
-
target_size=img_size,
|
102 |
-
batch_size=batch_size,
|
103 |
-
class_mode='categorical'
|
104 |
-
)
|
105 |
-
|
106 |
-
test_generator = test_datagen.flow_from_directory(
|
107 |
-
test_path,
|
108 |
-
target_size=img_size,
|
109 |
-
batch_size=batch_size,
|
110 |
-
class_mode='categorical'
|
111 |
-
)
|
112 |
-
|
113 |
-
st.write('## Showing Random Samples')
|
114 |
-
class_names = list(train_generator.class_indices.keys())
|
115 |
-
train_classes = pd.Series(train_generator.classes)
|
116 |
-
test_classes = pd.Series(test_generator.classes)
|
117 |
-
valid_classes = pd.Series(valid_generator.classes)
|
118 |
-
# Plot some samples from each class
|
119 |
-
fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(10, 6), subplot_kw={'xticks': [], 'yticks': []})
|
120 |
-
for i, axi in enumerate(ax.flat):
|
121 |
-
img = plt.imread(f'{train_path}/{class_names[i]}/{os.listdir(train_path+"/"+class_names[i])[0]}')
|
122 |
-
axi.imshow(img)
|
123 |
-
axi.set_title(class_names[i])
|
124 |
-
plt.tight_layout()
|
125 |
-
st.pyplot(fig)
|
126 |
-
|
127 |
-
|
128 |
-
st.markdown('---')
|
129 |
-
|
130 |
-
st.write('## Balance Classification')
|
131 |
-
|
132 |
-
# Create a pandas dataframe to show the distribution of classes in train, test, and validation data
|
133 |
-
df = pd.concat([train_classes.value_counts(), test_classes.value_counts(), valid_classes.value_counts()], axis=1)
|
134 |
-
df.columns = ['Training Data', 'Test Data', 'Validation Data']
|
135 |
-
df.index = class_names
|
136 |
-
|
137 |
-
fig, ax = plt.subplots(figsize=(12, 6))
|
138 |
-
df.plot(kind='bar', stacked=False, ax=ax, width=0.8)
|
139 |
-
plt.xlabel('Class')
|
140 |
-
plt.ylabel('Data Distribution')
|
141 |
-
plt.title('Data Distribution for each class')
|
142 |
-
plt.xticks(rotation=45, ha='right')
|
143 |
-
st.pyplot(fig)
|
144 |
-
|
145 |
-
|
146 |
-
st.markdown('---')
|
147 |
-
|
148 |
-
st.write('## Mean Pixel Value')
|
149 |
-
|
150 |
-
# Plot the mean of pixel mean of each channel for each class (unstacked bar chart)
|
151 |
-
means = []
|
152 |
-
for i in range(len(class_names)):
|
153 |
-
class_name = class_names[i]
|
154 |
-
img_path = os.path.join(train_path, class_name, os.listdir(os.path.join(train_path, class_name))[0])
|
155 |
-
img = image.load_img(img_path, target_size=img_size)
|
156 |
-
img_array = image.img_to_array(img)
|
157 |
-
means.append(np.mean(img_array, axis=(0, 1)))
|
158 |
-
means_df = pd.DataFrame(means, columns=['Red', 'Green', 'Blue'])
|
159 |
-
means_df.index = class_names
|
160 |
-
fig, ax = plt.subplots(figsize=(12, 6))
|
161 |
-
means_df.plot(kind='bar', stacked=False, ax=ax, width=0.8)
|
162 |
-
plt.xlabel('Class')
|
163 |
-
plt.ylabel('Mean pixel value')
|
164 |
-
plt.title('Mean pixel value of each channel for each class')
|
165 |
-
plt.xticks(rotation=45, ha='right')
|
166 |
-
st.pyplot(fig)
|
167 |
-
|
168 |
-
st.markdown('---')
|
169 |
|
170 |
|
171 |
|
|
|
34 |
dataset_path = "andrewsunanda/fast_food_image_classification"
|
35 |
st.write(os.listdir(dataset_path))
|
36 |
|
37 |
+
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
|
40 |
|