narinsak unawong commited on
Commit
6df879d
·
verified ·
1 Parent(s): 45bc0e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -44
app.py CHANGED
@@ -8,65 +8,67 @@ from sklearn.neighbors import KNeighborsClassifier
8
  from sklearn.metrics import accuracy_score
9
 
10
  # Load your data (replace with your actual data loading)
11
- # Assuming penguins.csv is in the same directory as your Streamlit app
12
- try:
13
- penguins = pd.read_csv('penguins_lter.csv')
14
- except FileNotFoundError:
15
- st.error("Error: penguins_lter.csv not found. Please make sure the file is in the same directory as the app.")
16
- st.stop()
17
 
18
- # Preprocessing steps (same as your original code)
19
- penguins = penguins.dropna()
20
- penguins.drop_duplicates(inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
  # Streamlit app
24
- st.title('Penguin Species Prediction')
25
 
26
- # Sidebar for user input
27
- st.sidebar.header('Input Features')
 
28
 
29
- island = st.sidebar.selectbox('Island', penguins['Island'].unique())
30
- culmen_length = st.sidebar.slider('Culmen Length (mm)', float(penguins['Culmen Length (mm)'].min()), float(penguins['Culmen Length (mm)'].max()), float(penguins['Culmen Length (mm)'].mean()))
31
- culmen_depth = st.sidebar.slider('Culmen Depth (mm)', float(penguins['Culmen Depth (mm)'].min()), float(penguins['Culmen Depth (mm)'].max()), float(penguins['Culmen Depth (mm)'].mean()))
32
- flipper_length = st.sidebar.slider('Flipper Length (mm)', float(penguins['Flipper Length (mm)'].min()), float(penguins['Flipper Length (mm)'].max()), float(penguins['Flipper Length (mm)'].mean()))
33
- body_mass = st.sidebar.slider('Body Mass (g)', float(penguins['Body Mass (g)'].min()), float(penguins['Body Mass (g)'].max()), float(penguins['Body Mass (g)'].mean()))
34
- sex = st.sidebar.selectbox('Sex', penguins['Sex'].unique())
 
 
35
 
36
- # Create input DataFrame
37
  input_data = pd.DataFrame({
38
- 'Island': [island],
39
  'Culmen Length (mm)': [culmen_length],
40
  'Culmen Depth (mm)': [culmen_depth],
41
  'Flipper Length (mm)': [flipper_length],
42
  'Body Mass (g)': [body_mass],
 
43
  'Sex': [sex]
44
  })
45
 
46
- # Prepare the model (same as before, including your pipeline)
47
- X = penguins.drop('Species', axis=1)
48
- y = penguins['Species']
49
-
50
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
51
-
52
- numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
53
- categorical_features = ['Island', 'Sex']
54
-
55
- numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
56
- categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
57
-
58
- preprocessor = ColumnTransformer(
59
- transformers=[
60
- ('num', numerical_transformer, numerical_features),
61
- ('cat', categorical_transformer, categorical_features)
62
- ])
63
 
64
- pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', KNeighborsClassifier())])
65
- pipeline.fit(X_train, y_train)
 
66
 
67
- # Make prediction
68
- prediction = pipeline.predict(input_data)
69
 
70
- # Display prediction
71
- st.subheader('Prediction')
72
- st.write(f"Predicted Penguin Species: {prediction[0]}")
 
8
  from sklearn.metrics import accuracy_score
9
 
10
  # Load your data (replace with your actual data loading)
11
+ penguins = pd.read_csv('penguins_lter.csv') # Make sure 'penguins_lter.csv' is in your app's directory or accessible
 
 
 
 
 
12
 
13
+ # Data cleaning and preprocessing (same as your original code)
14
+ penguins_cleaned = penguins.dropna()
15
+ penguins_cleaned = penguins_cleaned.drop_duplicates()
16
+
17
+ # Numerical and Categorical Features (same as original code)
18
+ numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
19
+ categorical_features = ['Island', 'Sex']
20
+
21
+ # Preprocessing pipeline (same as original code)
22
+ numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
23
+ categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
24
+ preprocessor = ColumnTransformer(transformers=[
25
+ ('num', numerical_transformer, numerical_features),
26
+ ('cat', categorical_transformer, categorical_features)
27
+ ])
28
+
29
+ # Machine Learning pipeline (same as original code)
30
+ pipeline = Pipeline(steps=[
31
+ ('preprocessor', preprocessor),
32
+ ('classifier', KNeighborsClassifier())
33
+ ])
34
 
35
 
36
  # Streamlit app
37
+ st.title("Penguin Species Classification")
38
 
39
+ # Display the dataset (optional)
40
+ if st.checkbox("Show Dataset"):
41
+ st.write(penguins_cleaned)
42
 
43
+ # User input features
44
+ st.header("Enter Penguin Features:")
45
+ culmen_length = st.number_input("Culmen Length (mm)", min_value=0.0)
46
+ culmen_depth = st.number_input("Culmen Depth (mm)", min_value=0.0)
47
+ flipper_length = st.number_input("Flipper Length (mm)", min_value=0.0)
48
+ body_mass = st.number_input("Body Mass (g)", min_value=0.0)
49
+ island = st.selectbox("Island", penguins_cleaned['Island'].unique())
50
+ sex = st.selectbox("Sex", penguins_cleaned['Sex'].unique())
51
 
52
+ # Create a dataframe for the input
53
  input_data = pd.DataFrame({
 
54
  'Culmen Length (mm)': [culmen_length],
55
  'Culmen Depth (mm)': [culmen_depth],
56
  'Flipper Length (mm)': [flipper_length],
57
  'Body Mass (g)': [body_mass],
58
+ 'Island': [island],
59
  'Sex': [sex]
60
  })
61
 
62
+ # Make Prediction
63
+ if st.button('Predict'):
64
+ # Assuming 'species' is your target variable (same as original code)
65
+ X = penguins_cleaned.drop('Species', axis=1)
66
+ y = penguins_cleaned['Species']
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ # Fit the model
69
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
70
+ pipeline.fit(X_train, y_train)
71
 
72
+ prediction = pipeline.predict(input_data)
 
73
 
74
+ st.write(f"Predicted Species: {prediction[0]}")