File size: 3,088 Bytes
0625161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import streamlit as st
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import base64
import seaborn as sns

st.write("""
# Penguin Prediction App

This app predicts the **Palmer Penguin** species!

Data obtained from the [palmerpenguins library](https://github.com/allisonhorst/palmerpenguins) in R by Allison Horst.
""")

st.sidebar.title('File Upload Features')

# Collects user input features into dataframe
uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"])
if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.dataframe(df)
    le = LabelEncoder()
    df.sex = le.fit_transform(df.sex)
    load_clf = pickle.load(open('penguins_clf.pkl', 'rb'))
    prediction = load_clf.predict(df)
    prediction_proba = load_clf.predict_proba(df)
    st.subheader('Prediction')
    penguins_species = np.array(['Adelie','Chinstrap','Gentoo'])
    pp = pd.DataFrame(penguins_species[prediction],columns=["prediction"])
    st.write(pp)
    st.subheader('Prediction Probability')
    st.dataframe(prediction_proba)
    ndf = pd.concat([df,pp],axis=1)
    st.write(ndf)
    plot = sns.barplot(x ="bill_length_mm",y="bill_depth_mm",data = df )
    st.pyplot(plot)

    def filedownload(df):
        csv = df.to_csv(index=False)
        b64 = base64.b64encode(csv.encode()).decode()  # strings <-> bytes conversions
        href = f'<a href="data:file/csv;base64,{b64}" download="penguins_predictions.csv">Download CSV File</a>'
        return href

    st.markdown(filedownload(ndf), unsafe_allow_html=True)

else:
    st.sidebar.title("Manual Feature input")
    def user_input_features():
        sex = st.sidebar.selectbox('Sex',('male','female'))
        bill_length_mm = st.sidebar.slider('Bill length (mm)', 32.1,59.6,43.9)
        bill_depth_mm = st.sidebar.slider('Bill depth (mm)', 13.1,21.5,17.2)
        flipper_length_mm = st.sidebar.slider('Flipper length (mm)', 172.0,231.0,201.0)
        body_mass_g = st.sidebar.slider('Body mass (g)', 2700.0,6300.0,4207.0)
        data = {
                'bill_length_mm': bill_length_mm,
                'bill_depth_mm': bill_depth_mm,
                'flipper_length_mm': flipper_length_mm,
                'body_mass_g': body_mass_g,
                'sex': sex}
        features = pd.DataFrame(data, index=[0])
        return features
    input_df = user_input_features()
    st.subheader('User Input features')
    st.write('Awaiting CSV file to be uploaded. Currently using example input parameters (shown below).')
    st.write(input_df)
    le = LabelEncoder()
    input_df.sex = le.fit_transform(input_df.sex)
    load_clf = pickle.load(open('penguins_clf.pkl', 'rb'))
    prediction = load_clf.predict(input_df)
    prediction_proba = load_clf.predict_proba(input_df)
    st.subheader('Prediction')
    penguins_species = np.array(['Adelie','Chinstrap','Gentoo'])
    st.write(penguins_species[prediction])
    st.subheader('Prediction Probability')
    st.write(prediction_proba)