narinsak unawong commited on
Commit
516b00f
·
verified ·
1 Parent(s): c221e75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -48
app.py CHANGED
@@ -8,22 +8,45 @@ from sklearn.neighbors import KNeighborsClassifier
8
  from sklearn.metrics import accuracy_score
9
 
10
  # Load your data (replace with your actual data loading)
11
- penguins = pd.read_csv('penguins_lter.csv')
 
 
 
 
 
12
 
13
- # Data Cleaning (same as your existing code)
14
- penguins_cleaned = penguins.dropna()
15
- penguins_cleaned = penguins_cleaned.drop_duplicates()
16
 
17
- # Fill missing values (same as your existing code)
18
- numerical_cols = penguins.select_dtypes(include=['number']).columns
19
- penguins[numerical_cols] = penguins[numerical_cols].fillna(penguins[numerical_cols].mean())
20
- categorical_cols = penguins.select_dtypes(include=['object']).columns
21
- penguins[categorical_cols] = penguins[categorical_cols].fillna(penguins[categorical_cols].mode().iloc[0])
22
 
 
 
23
 
24
- # Feature Engineering and Model Training (same as your existing code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  X = penguins.drop('Species', axis=1)
26
  y = penguins['Species']
 
27
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
28
 
29
  numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
@@ -38,44 +61,12 @@ preprocessor = ColumnTransformer(
38
  ('cat', categorical_transformer, categorical_features)
39
  ])
40
 
41
- pipeline = Pipeline(steps=[
42
- ('preprocessor', preprocessor),
43
- ('classifier', KNeighborsClassifier())
44
- ])
45
-
46
  pipeline.fit(X_train, y_train)
47
- y_pred = pipeline.predict(X_test)
48
- accuracy = accuracy_score(y_test, y_pred)
49
-
50
- # Streamlit App
51
- st.title("Penguin Species Classification")
52
-
53
- st.write("This app predicts the species of a penguin based on its features.")
54
-
55
- # Display the accuracy
56
- st.write(f"Model Accuracy: {accuracy}")
57
-
58
- # Input features for prediction
59
- culmen_length = st.number_input("Culmen Length (mm)", min_value=0.0)
60
- culmen_depth = st.number_input("Culmen Depth (mm)", min_value=0.0)
61
- flipper_length = st.number_input("Flipper Length (mm)", min_value=0.0)
62
- body_mass = st.number_input("Body Mass (g)", min_value=0.0)
63
- island = st.selectbox("Island", penguins['Island'].unique())
64
- sex = st.selectbox("Sex", penguins['Sex'].unique())
65
-
66
-
67
- # Create a DataFrame for prediction
68
- new_penguin = pd.DataFrame({
69
- 'Culmen Length (mm)': [culmen_length],
70
- 'Culmen Depth (mm)': [culmen_depth],
71
- 'Flipper Length (mm)': [flipper_length],
72
- 'Body Mass (g)': [body_mass],
73
- 'Island': [island],
74
- 'Sex': [sex]
75
- })
76
-
77
 
78
  # Make prediction
79
- if st.button("Predict Species"):
80
- prediction = pipeline.predict(new_penguin)
81
- st.write(f"Predicted Species: {prediction[0]}")
 
 
 
8
  from sklearn.metrics import accuracy_score
9
 
10
  # Load your data (replace with your actual data loading)
11
+ # Assuming penguins.csv is in the same directory as your Streamlit app
12
+ try:
13
+ penguins = pd.read_csv('penguins_lter.csv')
14
+ except FileNotFoundError:
15
+ st.error("Error: penguins_lter.csv not found. Please make sure the file is in the same directory as the app.")
16
+ st.stop()
17
 
18
+ # Preprocessing steps (same as your original code)
19
+ penguins = penguins.dropna()
20
+ penguins.drop_duplicates(inplace=True)
21
 
 
 
 
 
 
22
 
23
+ # Streamlit app
24
+ st.title('Penguin Species Prediction')
25
 
26
+ # Sidebar for user input
27
+ st.sidebar.header('Input Features')
28
+
29
+ island = st.sidebar.selectbox('Island', penguins['Island'].unique())
30
+ culmen_length = st.sidebar.slider('Culmen Length (mm)', float(penguins['Culmen Length (mm)'].min()), float(penguins['Culmen Length (mm)'].max()), float(penguins['Culmen Length (mm)'].mean()))
31
+ culmen_depth = st.sidebar.slider('Culmen Depth (mm)', float(penguins['Culmen Depth (mm)'].min()), float(penguins['Culmen Depth (mm)'].max()), float(penguins['Culmen Depth (mm)'].mean()))
32
+ flipper_length = st.sidebar.slider('Flipper Length (mm)', float(penguins['Flipper Length (mm)'].min()), float(penguins['Flipper Length (mm)'].max()), float(penguins['Flipper Length (mm)'].mean()))
33
+ body_mass = st.sidebar.slider('Body Mass (g)', float(penguins['Body Mass (g)'].min()), float(penguins['Body Mass (g)'].max()), float(penguins['Body Mass (g)'].mean()))
34
+ sex = st.sidebar.selectbox('Sex', penguins['Sex'].unique())
35
+
36
+ # Create input DataFrame
37
+ input_data = pd.DataFrame({
38
+ 'Island': [island],
39
+ 'Culmen Length (mm)': [culmen_length],
40
+ 'Culmen Depth (mm)': [culmen_depth],
41
+ 'Flipper Length (mm)': [flipper_length],
42
+ 'Body Mass (g)': [body_mass],
43
+ 'Sex': [sex]
44
+ })
45
+
46
+ # Prepare the model (same as before, including your pipeline)
47
  X = penguins.drop('Species', axis=1)
48
  y = penguins['Species']
49
+
50
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
51
 
52
  numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
 
61
  ('cat', categorical_transformer, categorical_features)
62
  ])
63
 
64
+ pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', KNeighborsClassifier())])
 
 
 
 
65
  pipeline.fit(X_train, y_train)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  # Make prediction
68
+ prediction = pipeline.predict(input_data)
69
+
70
+ # Display prediction
71
+ st.subheader('Prediction')
72
+ st.write(f"Predicted Penguin Species: {prediction[0]}"