Spaces:

andrewsunanda
/

fast_food_classification

Sleeping

App Files Files Community

fast_food_classification / eda.py

andrewsunanda

Update eda.py

d5063eb about 2 years ago

raw

history blame contribute delete

4.33 kB

	import streamlit as st
	import os
	import numpy as np
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import plotly.express as px
	from tensorflow.keras.preprocessing.image import ImageDataGenerator
	from tensorflow.keras.preprocessing import image

	st.set_page_config(page_title='Fast Food Classification Dataset Analysis', layout='wide', initial_sidebar_state='expanded')

	def run():

	# Buat Title
	st.title('EDA on Fast Food Classification')

	# Buat Deskripsi
	st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018')

	st.markdown('---')


	st.write('Dataset : Fast Food Classification')

	st.write('Objective : To create a model that can predict the type of a fast food based on image')


	st.markdown('---')


	# Define batch size and image size
	batch_size = 256
	img_size = (64, 64)
	# Define paths to the data folders
	script_dir = os.path.dirname(os.path.abspath(__file__))

	train_path = os.path.join(script_dir, 'food', 'Train')
	valid_path = os.path.join(script_dir, 'food', 'Valid')
	test_path = os.path.join(script_dir, 'food', 'Test')
	# Create data generators for training, validation, and testing
	train_datagen = ImageDataGenerator(
	rescale=1./255,
	horizontal_flip=True
	)

	valid_datagen = ImageDataGenerator(
	rescale=1./255
	)
	test_datagen = ImageDataGenerator(
	rescale=1./255
	)

	train_generator = train_datagen.flow_from_directory(
	train_path,
	target_size=img_size,
	batch_size=batch_size,
	class_mode='categorical'
	)

	valid_generator = valid_datagen.flow_from_directory(
	valid_path,
	target_size=img_size,
	batch_size=batch_size,
	class_mode='categorical'
	)

	test_generator = test_datagen.flow_from_directory(
	test_path,
	target_size=img_size,
	batch_size=batch_size,
	class_mode='categorical'
	)

	st.write('## Showing Random Samples')
	class_names = list(train_generator.class_indices.keys())
	train_classes = pd.Series(train_generator.classes)
	test_classes = pd.Series(test_generator.classes)
	valid_classes = pd.Series(valid_generator.classes)
	# Plot some samples from each class
	fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(10, 6), subplot_kw={'xticks': [], 'yticks': []})
	for i, axi in enumerate(ax.flat):
	img = plt.imread(f'{train_path}/{class_names[i]}/{os.listdir(train_path+"/"+class_names[i])[0]}')
	axi.imshow(img)
	axi.set_title(class_names[i])
	plt.tight_layout()
	st.pyplot(fig)


	st.markdown('---')

	st.write('## Balance Classification')

	# Create a pandas dataframe to show the distribution of classes in train, test, and validation data
	df = pd.concat([train_classes.value_counts(), test_classes.value_counts(), valid_classes.value_counts()], axis=1)
	df.columns = ['Training Data', 'Test Data', 'Validation Data']
	df.index = class_names

	fig, ax = plt.subplots(figsize=(12, 6))
	df.plot(kind='bar', stacked=False, ax=ax, width=0.8)
	plt.xlabel('Class')
	plt.ylabel('Data Distribution')
	plt.title('Data Distribution for each class')
	plt.xticks(rotation=45, ha='right')
	st.pyplot(fig)


	st.markdown('---')

	st.write('## Mean Pixel Value')

	# Plot the mean of pixel mean of each channel for each class (unstacked bar chart)
	means = []
	for i in range(len(class_names)):
	class_name = class_names[i]
	img_path = os.path.join(train_path, class_name, os.listdir(os.path.join(train_path, class_name))[0])
	img = image.load_img(img_path, target_size=img_size)
	img_array = image.img_to_array(img)
	means.append(np.mean(img_array, axis=(0, 1)))
	means_df = pd.DataFrame(means, columns=['Red', 'Green', 'Blue'])
	means_df.index = class_names
	fig, ax = plt.subplots(figsize=(12, 6))
	means_df.plot(kind='bar', stacked=False, ax=ax, width=0.8)
	plt.xlabel('Class')
	plt.ylabel('Mean pixel value')
	plt.title('Mean pixel value of each channel for each class')
	plt.xticks(rotation=45, ha='right')
	st.pyplot(fig)

	st.markdown('---')




	if __name__ == '__main__':
	run()