Spaces:

andrewsunanda
/

fast_food_classification

Sleeping

App Files Files Community

fast_food_classification / eda.py

andrewsunanda

Update eda.py

4045f11 about 2 years ago

raw

history blame

5.57 kB

	import streamlit as st
	import os
	import numpy as np
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import plotly.express as px
	from tensorflow.keras.preprocessing.image import ImageDataGenerator
	from tensorflow.keras.preprocessing import image

	st.set_page_config(page_title='Fast Food Classification Dataset Analysis', layout='wide', initial_sidebar_state='expanded')

	def run():

	# Buat Title
	st.title('EDA on Fast Food Classification')

	# Buat Deskripsi
	st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018')

	st.markdown('---')


	st.write('Dataset : Fast Food Classification')

	st.write('Objective : To create a model that can predict the type of a fast food based on image')


	st.markdown('---')
	import os
	import torch
	import torchvision.transforms as transforms
	from torch.utils.data import DataLoader
	from datasets import load_dataset

	# Define the path to the dataset
	dataset_path = 'andrewsunanda/fast_food_image_classification'

	# Load the dataset from Hugging Face
	dataset = load_dataset(dataset_path)

	# Define the batch size and image size
	batch_size = 256
	img_size = (64, 64)

	# Define the paths to the train, validation, and test folders
	train_path = os.path.join(dataset_path, 'Train')
	valid_path = os.path.join(dataset_path, 'Valid')
	test_path = os.path.join(dataset_path, 'Test')

	# Define the transforms for the dataset
	transform = transforms.Compose([
	transforms.Resize(img_size),
	transforms.ToTensor(),
	])

	# Load the training dataset
	train_dataset = dataset['train']
	train_dataset = train_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
	train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

	# Load the validation dataset
	valid_dataset = dataset['validation']
	valid_dataset = valid_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
	valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

	# Load the testing dataset
	test_dataset = dataset['test']
	test_dataset = test_dataset.map(lambda x: {'image': transform(x['image']), 'label': x['label']})
	test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
	# Create data generators for training, validation, and testing
	train_datagen = ImageDataGenerator(
	rescale=1./255,
	horizontal_flip=True
	)

	valid_datagen = ImageDataGenerator(
	rescale=1./255
	)
	test_datagen = ImageDataGenerator(
	rescale=1./255
	)

	train_generator = train_datagen.flow_from_directory(
	train_path,
	target_size=img_size,
	batch_size=batch_size,
	class_mode='categorical'
	)

	valid_generator = valid_datagen.flow_from_directory(
	valid_path,
	target_size=img_size,
	batch_size=batch_size,
	class_mode='categorical'
	)

	test_generator = test_datagen.flow_from_directory(
	test_path,
	target_size=img_size,
	batch_size=batch_size,
	class_mode='categorical'
	)

	st.write('## Showing Random Samples')
	class_names = list(train_generator.class_indices.keys())
	train_classes = pd.Series(train_generator.classes)
	test_classes = pd.Series(test_generator.classes)
	valid_classes = pd.Series(valid_generator.classes)
	# Plot some samples from each class
	fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(10, 6), subplot_kw={'xticks': [], 'yticks': []})
	for i, axi in enumerate(ax.flat):
	img = plt.imread(f'{train_path}/{class_names[i]}/{os.listdir(train_path+"/"+class_names[i])[0]}')
	axi.imshow(img)
	axi.set_title(class_names[i])
	plt.tight_layout()
	st.pyplot(fig)


	st.markdown('---')

	st.write('## Balance Classification')

	# Create a pandas dataframe to show the distribution of classes in train, test, and validation data
	df = pd.concat([train_classes.value_counts(), test_classes.value_counts(), valid_classes.value_counts()], axis=1)
	df.columns = ['Training Data', 'Test Data', 'Validation Data']
	df.index = class_names

	fig, ax = plt.subplots(figsize=(12, 6))
	df.plot(kind='bar', stacked=False, ax=ax, width=0.8)
	plt.xlabel('Class')
	plt.ylabel('Data Distribution')
	plt.title('Data Distribution for each class')
	plt.xticks(rotation=45, ha='right')
	st.pyplot(fig)


	st.markdown('---')

	st.write('## Mean Pixel Value')

	# Plot the mean of pixel mean of each channel for each class (unstacked bar chart)
	means = []
	for i in range(len(class_names)):
	class_name = class_names[i]
	img_path = os.path.join(train_path, class_name, os.listdir(os.path.join(train_path, class_name))[0])
	img = image.load_img(img_path, target_size=img_size)
	img_array = image.img_to_array(img)
	means.append(np.mean(img_array, axis=(0, 1)))
	means_df = pd.DataFrame(means, columns=['Red', 'Green', 'Blue'])
	means_df.index = class_names
	fig, ax = plt.subplots(figsize=(12, 6))
	means_df.plot(kind='bar', stacked=False, ax=ax, width=0.8)
	plt.xlabel('Class')
	plt.ylabel('Mean pixel value')
	plt.title('Mean pixel value of each channel for each class')
	plt.xticks(rotation=45, ha='right')
	st.pyplot(fig)

	st.markdown('---')




	if __name__ == '__main__':
	run()