|
import streamlit as st |
|
import numpy as np |
|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import plotly.express as px |
|
|
|
st.set_page_config(page_title='Wine Quality Dataset Analysis', layout='wide', initial_sidebar_state='expanded') |
|
|
|
def run(): |
|
|
|
|
|
st.title('EDA Dataset Wine Quality') |
|
|
|
|
|
st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018') |
|
|
|
st.markdown('---') |
|
|
|
|
|
st.write('Dataset : Wine Quality Data Set') |
|
|
|
st.write('Objective : Dapat memprediksi grade quality sebuah Wine') |
|
|
|
st.write('Evaluasi nya menggunakan Accuracy Score') |
|
|
|
|
|
st.markdown('---') |
|
|
|
st.write('## Dataset') |
|
data = pd.read_csv('winequalityN.csv') |
|
|
|
|
|
|
|
data = data.rename(columns={'fixed acidity' : 'fixed_acidity', 'volatile acidity' : 'volatile_acidity', 'citric acid':'citric_acid', 'residual sugar' : 'residual_sugar', |
|
'free sulfur dioxide' : 'free_sulfur_dioxide', 'total sulfur dioxide' : 'total_sulfur_dioxide'}) |
|
data.drop_duplicates(inplace=True) |
|
data.dropna(inplace=True, axis=0) |
|
|
|
st.dataframe(data) |
|
|
|
|
|
st.markdown('---') |
|
|
|
st.write('## Checking Balance / Imbalance') |
|
|
|
quality = data['quality'].value_counts() |
|
|
|
fig = plt.figure() |
|
quality.plot(kind='bar', |
|
color='blue') |
|
|
|
plt.title('Perbandingan Jumlah Quality Wine') |
|
plt.xlabel('Quality') |
|
plt.ylabel('Count') |
|
st.pyplot(fig) |
|
|
|
|
|
st.markdown('---') |
|
|
|
st.write('## Each Features Correlation with Wine Quality') |
|
|
|
red = data[data['type'] == 'red'].corrwith(data['quality']) |
|
white = data[data['type'] == 'white'].corrwith(data['quality']) |
|
fig = plt.figure() |
|
sns.heatmap(red.to_frame(), cmap='RdYlGn', annot=True, vmin=-1,vmax=1) |
|
plt.title('Red Wine') |
|
st.pyplot(fig) |
|
|
|
fig = plt.figure() |
|
sns.heatmap(white.to_frame(), cmap='RdYlGn', annot=True, vmin=-1,vmax=1) |
|
plt.title('White Wine') |
|
st.pyplot(fig) |
|
|
|
st.write('Semakin tinggi level sulphates dan citric acid dalam red wine akan menghasilkan red wine yang semakin bagus') |
|
st.write('Sebaliknya, sulphates dan citric acid tidak terlalu berpengaruh pada white wine, tetapi level density yang semakin rendah akan menghasilkan white wine yang baik') |
|
|
|
st.markdown('---') |
|
|
|
st.write('## Alcohol and Volatile Acidity') |
|
|
|
fig = plt.figure() |
|
sns.scatterplot(x='alcohol', y='volatile_acidity', data=data, hue='quality', palette='coolwarm') |
|
plt.title('Perbandingan Level Alcohol dan Volatile Acidity dengan Kualitas suatu Wine') |
|
st.pyplot(fig) |
|
|
|
st.markdown('---') |
|
|
|
st.write('## Citric Acid') |
|
|
|
fig = plt.figure() |
|
sns.scatterplot(x='quality', y='citric_acid', data=data, hue='type') |
|
plt.title('Perbandingan Kematian dengan Lama Follow-up period') |
|
st.pyplot(fig) |
|
|
|
|
|
st.markdown('---') |
|
|
|
|
|
if __name__ == '__main__': |
|
run() |