narinsak commited on
Commit
cec45bd
1 Parent(s): d8bc07d

Upload app-penguins.py

Browse files
Files changed (1) hide show
  1. app-penguins.py +81 -0
app-penguins.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.pipeline import Pipeline
5
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
6
+ from sklearn.compose import ColumnTransformer
7
+ from sklearn.neighbors import KNeighborsClassifier
8
+ from sklearn.metrics import accuracy_score
9
+
10
+ # Load your data (replace with your actual data loading)
11
+ penguins = pd.read_csv('penguins_lter.csv')
12
+
13
+ # Data Cleaning (same as your existing code)
14
+ penguins_cleaned = penguins.dropna()
15
+ penguins_cleaned = penguins_cleaned.drop_duplicates()
16
+
17
+ # Fill missing values (same as your existing code)
18
+ numerical_cols = penguins.select_dtypes(include=['number']).columns
19
+ penguins[numerical_cols] = penguins[numerical_cols].fillna(penguins[numerical_cols].mean())
20
+ categorical_cols = penguins.select_dtypes(include=['object']).columns
21
+ penguins[categorical_cols] = penguins[categorical_cols].fillna(penguins[categorical_cols].mode().iloc[0])
22
+
23
+
24
+ # Feature Engineering and Model Training (same as your existing code)
25
+ X = penguins.drop('Species', axis=1)
26
+ y = penguins['Species']
27
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
28
+
29
+ numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
30
+ categorical_features = ['Island', 'Sex']
31
+
32
+ numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
33
+ categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
34
+
35
+ preprocessor = ColumnTransformer(
36
+ transformers=[
37
+ ('num', numerical_transformer, numerical_features),
38
+ ('cat', categorical_transformer, categorical_features)
39
+ ])
40
+
41
+ pipeline = Pipeline(steps=[
42
+ ('preprocessor', preprocessor),
43
+ ('classifier', KNeighborsClassifier())
44
+ ])
45
+
46
+ pipeline.fit(X_train, y_train)
47
+ y_pred = pipeline.predict(X_test)
48
+ accuracy = accuracy_score(y_test, y_pred)
49
+
50
+ # Streamlit App
51
+ st.title("Penguin Species Classification")
52
+
53
+ st.write("This app predicts the species of a penguin based on its features.")
54
+
55
+ # Display the accuracy
56
+ st.write(f"Model Accuracy: {accuracy}")
57
+
58
+ # Input features for prediction
59
+ culmen_length = st.number_input("Culmen Length (mm)", min_value=0.0)
60
+ culmen_depth = st.number_input("Culmen Depth (mm)", min_value=0.0)
61
+ flipper_length = st.number_input("Flipper Length (mm)", min_value=0.0)
62
+ body_mass = st.number_input("Body Mass (g)", min_value=0.0)
63
+ island = st.selectbox("Island", penguins['Island'].unique())
64
+ sex = st.selectbox("Sex", penguins['Sex'].unique())
65
+
66
+
67
+ # Create a DataFrame for prediction
68
+ new_penguin = pd.DataFrame({
69
+ 'Culmen Length (mm)': [culmen_length],
70
+ 'Culmen Depth (mm)': [culmen_depth],
71
+ 'Flipper Length (mm)': [flipper_length],
72
+ 'Body Mass (g)': [body_mass],
73
+ 'Island': [island],
74
+ 'Sex': [sex]
75
+ })
76
+
77
+
78
+ # Make prediction
79
+ if st.button("Predict Species"):
80
+ prediction = pipeline.predict(new_penguin)
81
+ st.write(f"Predicted Species: {prediction[0]}")