louiecerv commited on
Commit
2b3c03c
·
1 Parent(s): fa94ea3
Files changed (2) hide show
  1. requirements.txt +2 -1
  2. src/streamlit_app.py +25 -1
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  altair
2
  pandas
3
- streamlit
 
 
1
  altair
2
  pandas
3
+ streamlit
4
+ sklearn
src/streamlit_app.py CHANGED
@@ -1,5 +1,9 @@
1
  import streamlit as st
2
  import pandas as pd
 
 
 
 
3
 
4
  penguin_df = pd.read_csv('src/penguins.csv')
5
  st.write(penguin_df.head())
@@ -16,4 +20,24 @@ features = pd.get_dummies(features)
16
  st.write('Here are our output variables')
17
  st.write(output.head())
18
  st.write('Here are our feature variables')
19
- st.write(features.head())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+
5
+ from sklearn.metrics import accuracy_score
6
+ from sklearn.ensemble import RandomForestClassifier
7
 
8
  penguin_df = pd.read_csv('src/penguins.csv')
9
  st.write(penguin_df.head())
 
20
  st.write('Here are our output variables')
21
  st.write(output.head())
22
  st.write('Here are our feature variables')
23
+ st.write(features.head())
24
+
25
+ st.subheader('Model Training')
26
+
27
+ output = penguin_df['species']
28
+ features = penguin_df[['island', 'bill_length_mm', 'bill_depth_mm',
29
+ 'flipper_length_mm', 'body_mass_g', 'sex']]
30
+
31
+ features = pd.get_dummies(features)
32
+ output, uniques = pd.factorize(output)
33
+
34
+ x_train, x_test, y_train, y_test = train_test_split(
35
+
36
+ features, output, test_size=.8)
37
+
38
+ rfc = RandomForestClassifier(random_state=15)
39
+ rfc.fit(x_train.values, y_train)
40
+
41
+ y_pred = rfc.predict(x_test.values)
42
+ score = accuracy_score(y_pred, y_test)
43
+ st.write('Our accuracy score for this model is {}'.format(score))