|
import streamlit as st |
|
import os |
|
import numpy as np |
|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import plotly.express as px |
|
from tensorflow.keras.preprocessing.image import ImageDataGenerator |
|
from tensorflow.keras.preprocessing import image |
|
|
|
st.set_page_config(page_title='Fast Food Classification Dataset Analysis', layout='wide', initial_sidebar_state='expanded') |
|
|
|
def run(): |
|
|
|
|
|
st.title('EDA on Fast Food Classification') |
|
|
|
|
|
st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018') |
|
|
|
st.markdown('---') |
|
|
|
|
|
st.write('Dataset : Fast Food Classification') |
|
|
|
st.write('Objective : To create a model that can predict the type of a fast food based on image') |
|
|
|
|
|
st.markdown('---') |
|
|
|
|
|
|
|
batch_size = 256 |
|
img_size = (64, 64) |
|
|
|
script_dir = os.path.dirname(os.path.abspath(__file__)) |
|
|
|
train_path = os.path.join(script_dir, 'food', 'Train') |
|
valid_path = os.path.join(script_dir, 'food', 'Valid') |
|
test_path = os.path.join(script_dir, 'food', 'Test') |
|
|
|
train_datagen = ImageDataGenerator( |
|
rescale=1./255, |
|
horizontal_flip=True |
|
) |
|
|
|
valid_datagen = ImageDataGenerator( |
|
rescale=1./255 |
|
) |
|
test_datagen = ImageDataGenerator( |
|
rescale=1./255 |
|
) |
|
|
|
train_generator = train_datagen.flow_from_directory( |
|
train_path, |
|
target_size=img_size, |
|
batch_size=batch_size, |
|
class_mode='categorical' |
|
) |
|
|
|
valid_generator = valid_datagen.flow_from_directory( |
|
valid_path, |
|
target_size=img_size, |
|
batch_size=batch_size, |
|
class_mode='categorical' |
|
) |
|
|
|
test_generator = test_datagen.flow_from_directory( |
|
test_path, |
|
target_size=img_size, |
|
batch_size=batch_size, |
|
class_mode='categorical' |
|
) |
|
|
|
st.write('## Showing Random Samples') |
|
class_names = list(train_generator.class_indices.keys()) |
|
train_classes = pd.Series(train_generator.classes) |
|
test_classes = pd.Series(test_generator.classes) |
|
valid_classes = pd.Series(valid_generator.classes) |
|
|
|
fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(10, 6), subplot_kw={'xticks': [], 'yticks': []}) |
|
for i, axi in enumerate(ax.flat): |
|
img = plt.imread(f'{train_path}/{class_names[i]}/{os.listdir(train_path+"/"+class_names[i])[0]}') |
|
axi.imshow(img) |
|
axi.set_title(class_names[i]) |
|
plt.tight_layout() |
|
st.pyplot(fig) |
|
|
|
|
|
st.markdown('---') |
|
|
|
st.write('## Balance Classification') |
|
|
|
|
|
df = pd.concat([train_classes.value_counts(), test_classes.value_counts(), valid_classes.value_counts()], axis=1) |
|
df.columns = ['Training Data', 'Test Data', 'Validation Data'] |
|
df.index = class_names |
|
|
|
fig, ax = plt.subplots(figsize=(12, 6)) |
|
df.plot(kind='bar', stacked=False, ax=ax, width=0.8) |
|
plt.xlabel('Class') |
|
plt.ylabel('Data Distribution') |
|
plt.title('Data Distribution for each class') |
|
plt.xticks(rotation=45, ha='right') |
|
st.pyplot(fig) |
|
|
|
|
|
st.markdown('---') |
|
|
|
st.write('## Mean Pixel Value') |
|
|
|
|
|
means = [] |
|
for i in range(len(class_names)): |
|
class_name = class_names[i] |
|
img_path = os.path.join(train_path, class_name, os.listdir(os.path.join(train_path, class_name))[0]) |
|
img = image.load_img(img_path, target_size=img_size) |
|
img_array = image.img_to_array(img) |
|
means.append(np.mean(img_array, axis=(0, 1))) |
|
means_df = pd.DataFrame(means, columns=['Red', 'Green', 'Blue']) |
|
means_df.index = class_names |
|
fig, ax = plt.subplots(figsize=(12, 6)) |
|
means_df.plot(kind='bar', stacked=False, ax=ax, width=0.8) |
|
plt.xlabel('Class') |
|
plt.ylabel('Mean pixel value') |
|
plt.title('Mean pixel value of each channel for each class') |
|
plt.xticks(rotation=45, ha='right') |
|
st.pyplot(fig) |
|
|
|
st.markdown('---') |
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
run() |