import streamlit as st import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import plotly.express as px st.set_page_config(page_title='Wine Quality Dataset Analysis', layout='wide', initial_sidebar_state='expanded') def run(): # Buat Title st.title('EDA Dataset Wine Quality') # Buat Deskripsi st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018') st.markdown('---') st.write('Dataset : Wine Quality Data Set') st.write('Objective : Dapat memprediksi grade quality sebuah Wine') st.write('Evaluasi nya menggunakan Accuracy Score') st.markdown('---') st.write('## Dataset') data = pd.read_csv('winequalityN.csv') # Rename nama column agar lebih mudah digunakan data = data.rename(columns={'fixed acidity' : 'fixed_acidity', 'volatile acidity' : 'volatile_acidity', 'citric acid':'citric_acid', 'residual sugar' : 'residual_sugar', 'free sulfur dioxide' : 'free_sulfur_dioxide', 'total sulfur dioxide' : 'total_sulfur_dioxide'}) data.drop_duplicates(inplace=True) data.dropna(inplace=True, axis=0) st.dataframe(data) st.markdown('---') st.write('## Checking Balance / Imbalance') quality = data['quality'].value_counts() fig = plt.figure() quality.plot(kind='bar', color='blue') plt.title('Perbandingan Jumlah Quality Wine') plt.xlabel('Quality') plt.ylabel('Count') st.pyplot(fig) st.markdown('---') st.write('## Each Features Correlation with Wine Quality') red = data[data['type'] == 'red'].corrwith(data['quality']) white = data[data['type'] == 'white'].corrwith(data['quality']) fig = plt.figure() sns.heatmap(red.to_frame(), cmap='RdYlGn', annot=True, vmin=-1,vmax=1) plt.title('Red Wine') st.pyplot(fig) fig = plt.figure() sns.heatmap(white.to_frame(), cmap='RdYlGn', annot=True, vmin=-1,vmax=1) plt.title('White Wine') st.pyplot(fig) st.write('Semakin tinggi level sulphates dan citric acid dalam red wine akan menghasilkan red wine yang semakin bagus') st.write('Sebaliknya, sulphates dan citric acid tidak terlalu berpengaruh pada white wine, tetapi level density yang semakin rendah akan menghasilkan white wine yang baik') st.markdown('---') st.write('## Alcohol and Volatile Acidity') fig = plt.figure() sns.scatterplot(x='alcohol', y='volatile_acidity', data=data, hue='quality', palette='coolwarm') plt.title('Perbandingan Level Alcohol dan Volatile Acidity dengan Kualitas suatu Wine') st.pyplot(fig) st.markdown('---') st.write('## Citric Acid') fig = plt.figure() sns.scatterplot(x='quality', y='citric_acid', data=data, hue='type') plt.title('Perbandingan Kematian dengan Lama Follow-up period') st.pyplot(fig) st.markdown('---') if __name__ == '__main__': run()