andrewsunanda commited on
Commit
cb4591a
·
1 Parent(s): ae47e4d

Update eda.py

Browse files
Files changed (1) hide show
  1. eda.py +1 -132
eda.py CHANGED
@@ -34,138 +34,7 @@ def run():
34
  dataset_path = "andrewsunanda/fast_food_image_classification"
35
  st.write(os.listdir(dataset_path))
36
 
37
- import os
38
- import torch
39
- import torchvision.transforms as transforms
40
- from torch.utils.data import DataLoader
41
- from datasets import load_dataset
42
-
43
- # Define the path to the dataset
44
- dataset_path = 'andrewsunanda/fast_food_image_classification'
45
-
46
- # Load the dataset from Hugging Face
47
- dataset = load_dataset(dataset_path)
48
-
49
- # Define the batch size and image size
50
- batch_size = 256
51
- img_size = (64, 64)
52
-
53
- # Define the paths to the train, validation, and test folders
54
- train_path = os.path.join(dataset_path, 'Train')
55
- valid_path = os.path.join(dataset_path, 'Valid')
56
- test_path = os.path.join(dataset_path, 'Test')
57
-
58
- # Define the transforms for the dataset
59
- transform = transforms.Compose([
60
- transforms.Resize(img_size),
61
- transforms.ToTensor(),
62
- ])
63
-
64
- # Load the training dataset
65
- train_dataset = dataset['train']
66
- train_dataset = train_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
67
- train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
68
-
69
- # Load the validation dataset
70
- valid_dataset = dataset['validation']
71
- valid_dataset = valid_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
72
- valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
73
-
74
- # Load the testing dataset
75
- test_dataset = dataset['test']
76
- test_dataset = test_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
77
- test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
78
-
79
- # Create data generators for training, validation, and testing
80
- train_datagen = ImageDataGenerator(
81
- rescale=1./255,
82
- horizontal_flip=True
83
- )
84
-
85
- valid_datagen = ImageDataGenerator(
86
- rescale=1./255
87
- )
88
- test_datagen = ImageDataGenerator(
89
- rescale=1./255
90
- )
91
-
92
- train_generator = train_datagen.flow_from_directory(
93
- train_path,
94
- target_size=img_size,
95
- batch_size=batch_size,
96
- class_mode='categorical'
97
- )
98
-
99
- valid_generator = valid_datagen.flow_from_directory(
100
- valid_path,
101
- target_size=img_size,
102
- batch_size=batch_size,
103
- class_mode='categorical'
104
- )
105
-
106
- test_generator = test_datagen.flow_from_directory(
107
- test_path,
108
- target_size=img_size,
109
- batch_size=batch_size,
110
- class_mode='categorical'
111
- )
112
-
113
- st.write('## Showing Random Samples')
114
- class_names = list(train_generator.class_indices.keys())
115
- train_classes = pd.Series(train_generator.classes)
116
- test_classes = pd.Series(test_generator.classes)
117
- valid_classes = pd.Series(valid_generator.classes)
118
- # Plot some samples from each class
119
- fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(10, 6), subplot_kw={'xticks': [], 'yticks': []})
120
- for i, axi in enumerate(ax.flat):
121
- img = plt.imread(f'{train_path}/{class_names[i]}/{os.listdir(train_path+"/"+class_names[i])[0]}')
122
- axi.imshow(img)
123
- axi.set_title(class_names[i])
124
- plt.tight_layout()
125
- st.pyplot(fig)
126
-
127
-
128
- st.markdown('---')
129
-
130
- st.write('## Balance Classification')
131
-
132
- # Create a pandas dataframe to show the distribution of classes in train, test, and validation data
133
- df = pd.concat([train_classes.value_counts(), test_classes.value_counts(), valid_classes.value_counts()], axis=1)
134
- df.columns = ['Training Data', 'Test Data', 'Validation Data']
135
- df.index = class_names
136
-
137
- fig, ax = plt.subplots(figsize=(12, 6))
138
- df.plot(kind='bar', stacked=False, ax=ax, width=0.8)
139
- plt.xlabel('Class')
140
- plt.ylabel('Data Distribution')
141
- plt.title('Data Distribution for each class')
142
- plt.xticks(rotation=45, ha='right')
143
- st.pyplot(fig)
144
-
145
-
146
- st.markdown('---')
147
-
148
- st.write('## Mean Pixel Value')
149
-
150
- # Plot the mean of pixel mean of each channel for each class (unstacked bar chart)
151
- means = []
152
- for i in range(len(class_names)):
153
- class_name = class_names[i]
154
- img_path = os.path.join(train_path, class_name, os.listdir(os.path.join(train_path, class_name))[0])
155
- img = image.load_img(img_path, target_size=img_size)
156
- img_array = image.img_to_array(img)
157
- means.append(np.mean(img_array, axis=(0, 1)))
158
- means_df = pd.DataFrame(means, columns=['Red', 'Green', 'Blue'])
159
- means_df.index = class_names
160
- fig, ax = plt.subplots(figsize=(12, 6))
161
- means_df.plot(kind='bar', stacked=False, ax=ax, width=0.8)
162
- plt.xlabel('Class')
163
- plt.ylabel('Mean pixel value')
164
- plt.title('Mean pixel value of each channel for each class')
165
- plt.xticks(rotation=45, ha='right')
166
- st.pyplot(fig)
167
-
168
- st.markdown('---')
169
 
170
 
171
 
 
34
  dataset_path = "andrewsunanda/fast_food_image_classification"
35
  st.write(os.listdir(dataset_path))
36
 
37
+ #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40