Commit
·
9f47e0f
1
Parent(s):
625ccb3
Submit Model
Browse files- app.py +12 -0
- eda.py +101 -0
- model.pkl +3 -0
- prediction.py +62 -0
- requirements.txt +7 -0
app.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
|
6 |
+
navigation = st.sidebar.selectbox('Pilih Halaman', ('EDA', 'Predict'))
|
7 |
+
|
8 |
+
|
9 |
+
if navigation == 'EDA':
|
10 |
+
eda.run()
|
11 |
+
else:
|
12 |
+
prediction.run()
|
eda.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import seaborn as sns
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import plotly.express as px
|
7 |
+
|
8 |
+
st.set_page_config(page_title='Wine Quality Dataset Analysis', layout='wide', initial_sidebar_state='expanded')
|
9 |
+
|
10 |
+
def run():
|
11 |
+
|
12 |
+
# Buat Title
|
13 |
+
st.title('EDA Dataset Wine Quality')
|
14 |
+
|
15 |
+
# Buat Deskripsi
|
16 |
+
st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018')
|
17 |
+
|
18 |
+
st.markdown('---')
|
19 |
+
|
20 |
+
|
21 |
+
st.write('Dataset : Wine Quality Data Set')
|
22 |
+
|
23 |
+
st.write('Objective : Dapat memprediksi grade quality sebuah Wine')
|
24 |
+
|
25 |
+
st.write('Evaluasi nya menggunakan Accuracy Score')
|
26 |
+
|
27 |
+
|
28 |
+
st.markdown('---')
|
29 |
+
|
30 |
+
st.write('## Dataset')
|
31 |
+
data = pd.read_csv('winequalityN.csv')
|
32 |
+
|
33 |
+
# Rename nama column agar lebih mudah digunakan
|
34 |
+
|
35 |
+
data = data.rename(columns={'fixed acidity' : 'fixed_acidity', 'volatile acidity' : 'volatile_acidity', 'citric acid':'citric_acid', 'residual sugar' : 'residual_sugar',
|
36 |
+
'free sulfur dioxide' : 'free_sulfur_dioxide', 'total sulfur dioxide' : 'total_sulfur_dioxide'})
|
37 |
+
data.drop_duplicates(inplace=True)
|
38 |
+
data.dropna(inplace=True, axis=0)
|
39 |
+
|
40 |
+
st.dataframe(data)
|
41 |
+
|
42 |
+
|
43 |
+
st.markdown('---')
|
44 |
+
|
45 |
+
st.write('## Checking Balance / Imbalance')
|
46 |
+
|
47 |
+
quality = data['quality'].value_counts()
|
48 |
+
|
49 |
+
fig = plt.figure()
|
50 |
+
quality.plot(kind='bar',
|
51 |
+
color='blue')
|
52 |
+
|
53 |
+
plt.title('Perbandingan Jumlah Quality Wine')
|
54 |
+
plt.xlabel('Quality')
|
55 |
+
plt.ylabel('Count')
|
56 |
+
st.pyplot(fig)
|
57 |
+
|
58 |
+
|
59 |
+
st.markdown('---')
|
60 |
+
|
61 |
+
st.write('## Each Features Correlation with Wine Quality')
|
62 |
+
|
63 |
+
red = data[data['type'] == 'red'].corrwith(data['quality'])
|
64 |
+
white = data[data['type'] == 'white'].corrwith(data['quality'])
|
65 |
+
fig = plt.figure()
|
66 |
+
sns.heatmap(red.to_frame(), cmap='RdYlGn', annot=True, vmin=-1,vmax=1)
|
67 |
+
plt.title('Red Wine')
|
68 |
+
st.pyplot(fig)
|
69 |
+
|
70 |
+
fig = plt.figure()
|
71 |
+
sns.heatmap(white.to_frame(), cmap='RdYlGn', annot=True, vmin=-1,vmax=1)
|
72 |
+
plt.title('White Wine')
|
73 |
+
st.pyplot(fig)
|
74 |
+
|
75 |
+
st.write('Semakin tinggi level sulphates dan citric acid dalam red wine akan menghasilkan red wine yang semakin bagus')
|
76 |
+
st.write('Sebaliknya, sulphates dan citric acid tidak terlalu berpengaruh pada white wine, tetapi level density yang semakin rendah akan menghasilkan white wine yang baik')
|
77 |
+
|
78 |
+
st.markdown('---')
|
79 |
+
|
80 |
+
st.write('## Alcohol and Volatile Acidity')
|
81 |
+
|
82 |
+
fig = plt.figure()
|
83 |
+
sns.scatterplot(x='alcohol', y='volatile_acidity', data=data, hue='quality', palette='coolwarm')
|
84 |
+
plt.title('Perbandingan Level Alcohol dan Volatile Acidity dengan Kualitas suatu Wine')
|
85 |
+
st.pyplot(fig)
|
86 |
+
|
87 |
+
st.markdown('---')
|
88 |
+
|
89 |
+
st.write('## Citric Acid')
|
90 |
+
|
91 |
+
fig = plt.figure()
|
92 |
+
sns.scatterplot(x='quality', y='citric_acid', data=data, hue='type')
|
93 |
+
plt.title('Perbandingan Kematian dengan Lama Follow-up period')
|
94 |
+
st.pyplot(fig)
|
95 |
+
|
96 |
+
|
97 |
+
st.markdown('---')
|
98 |
+
|
99 |
+
|
100 |
+
if __name__ == '__main__':
|
101 |
+
run()
|
model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7ee0ff3fe56eb4acb018e634850e719906fe35dcec41aa3be33164160367e25
|
3 |
+
size 555794
|
prediction.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
import json
|
6 |
+
|
7 |
+
|
8 |
+
# Load All Files
|
9 |
+
|
10 |
+
with open('model.pkl', 'rb') as file_1:
|
11 |
+
pipeline = pickle.load(file_1)
|
12 |
+
|
13 |
+
def run():
|
14 |
+
|
15 |
+
st.title('Wine Quality Prediction')
|
16 |
+
|
17 |
+
|
18 |
+
with st.form(key='form_heart_failure'):
|
19 |
+
type = st.selectbox('Red/White Wine?', ('red','white'))
|
20 |
+
fixed = st.number_input('Level of Fixed Acidity', min_value=3.0, max_value=20., value=5.,step=.1)
|
21 |
+
volatile = st.number_input('Level of Volatile Acidity', min_value=.01, max_value=2., value=1.,step=.01)
|
22 |
+
citric = st.number_input('Level of Citric Acid', min_value=.0, max_value=2., value=1.,step=.01)
|
23 |
+
sugar = st.number_input('Level of Residual Sugar', min_value=.1, max_value=80., value=1.,step=.1)
|
24 |
+
chlorides = st.number_input('Level of Chlorides', min_value=.001, max_value=1., value=.001,step=.001)
|
25 |
+
free = st.number_input('Level of Free Sulfur Dioxide', min_value=1, max_value=300, value=20,step=1)
|
26 |
+
total = st.number_input('Level of Total Sullfur Dioxide', min_value=5, max_value=450, value=100,step=1)
|
27 |
+
density = st.number_input('Level of Density', min_value=.8, max_value=1.2, value=.9,step=.001)
|
28 |
+
pH = st.number_input('Level of pH', min_value=2., max_value=5., value=2.5,step=.1)
|
29 |
+
sulphates = st.number_input('Level of Sulphates', min_value=.1, max_value=3., value=1.,step=.1)
|
30 |
+
alcohol = st.number_input('Level of Alcohol', min_value=5., max_value=20., value=10., step=.1)
|
31 |
+
|
32 |
+
submitted = st.form_submit_button('Predict')
|
33 |
+
|
34 |
+
|
35 |
+
data_inf = {
|
36 |
+
'type': type,
|
37 |
+
'fixed_acidity': fixed,
|
38 |
+
'volatile_acidity': volatile,
|
39 |
+
'citric_acid': citric,
|
40 |
+
'residual_sugar' : sugar,
|
41 |
+
'chlorides': chlorides,
|
42 |
+
'free_sulfur_dioxide': free,
|
43 |
+
'total_sulfur_dioxide': total,
|
44 |
+
'density': density,
|
45 |
+
'pH': pH,
|
46 |
+
'sulphates': sulphates,
|
47 |
+
'alcohol': alcohol
|
48 |
+
}
|
49 |
+
|
50 |
+
data_inf = pd.DataFrame([data_inf])
|
51 |
+
st.dataframe(data_inf)
|
52 |
+
|
53 |
+
if submitted:
|
54 |
+
# Predict using Model
|
55 |
+
|
56 |
+
y_pred_inf = pipeline.predict(data_inf)
|
57 |
+
st.write('Hasil prediksi Model : ', y_pred_inf)
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
if __name__ == '__main__':
|
62 |
+
run()
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
seaborn
|
4 |
+
matplotlib
|
5 |
+
numpy
|
6 |
+
plotly
|
7 |
+
scikit-learn==1.2.1
|