narinsak unawong commited on
Commit
ae96213
·
verified ·
1 Parent(s): 6df879d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -44
app.py CHANGED
@@ -7,68 +7,61 @@ from sklearn.compose import ColumnTransformer
7
  from sklearn.neighbors import KNeighborsClassifier
8
  from sklearn.metrics import accuracy_score
9
 
10
- # Load your data (replace with your actual data loading)
11
- penguins = pd.read_csv('penguins_lter.csv') # Make sure 'penguins_lter.csv' is in your app's directory or accessible
 
 
 
12
 
13
- # Data cleaning and preprocessing (same as your original code)
14
- penguins_cleaned = penguins.dropna()
15
- penguins_cleaned = penguins_cleaned.drop_duplicates()
16
 
17
- # Numerical and Categorical Features (same as original code)
 
 
 
18
  numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
19
  categorical_features = ['Island', 'Sex']
20
 
21
- # Preprocessing pipeline (same as original code)
22
  numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
23
  categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
24
- preprocessor = ColumnTransformer(transformers=[
25
- ('num', numerical_transformer, numerical_features),
26
- ('cat', categorical_transformer, categorical_features)
27
- ])
28
-
29
- # Machine Learning pipeline (same as original code)
30
- pipeline = Pipeline(steps=[
31
- ('preprocessor', preprocessor),
32
- ('classifier', KNeighborsClassifier())
33
- ])
34
 
 
 
 
 
 
35
 
36
- # Streamlit app
37
- st.title("Penguin Species Classification")
 
38
 
39
- # Display the dataset (optional)
40
- if st.checkbox("Show Dataset"):
41
- st.write(penguins_cleaned)
42
 
43
- # User input features
44
- st.header("Enter Penguin Features:")
45
- culmen_length = st.number_input("Culmen Length (mm)", min_value=0.0)
46
- culmen_depth = st.number_input("Culmen Depth (mm)", min_value=0.0)
47
- flipper_length = st.number_input("Flipper Length (mm)", min_value=0.0)
48
- body_mass = st.number_input("Body Mass (g)", min_value=0.0)
49
- island = st.selectbox("Island", penguins_cleaned['Island'].unique())
50
- sex = st.selectbox("Sex", penguins_cleaned['Sex'].unique())
51
 
52
- # Create a dataframe for the input
53
  input_data = pd.DataFrame({
 
54
  'Culmen Length (mm)': [culmen_length],
55
  'Culmen Depth (mm)': [culmen_depth],
56
  'Flipper Length (mm)': [flipper_length],
57
  'Body Mass (g)': [body_mass],
58
- 'Island': [island],
59
  'Sex': [sex]
60
  })
61
 
62
- # Make Prediction
63
- if st.button('Predict'):
64
- # Assuming 'species' is your target variable (same as original code)
65
- X = penguins_cleaned.drop('Species', axis=1)
66
- y = penguins_cleaned['Species']
67
-
68
- # Fit the model
69
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
70
- pipeline.fit(X_train, y_train)
71
-
72
- prediction = pipeline.predict(input_data)
73
 
74
- st.write(f"Predicted Species: {prediction[0]}")
 
 
 
7
  from sklearn.neighbors import KNeighborsClassifier
8
  from sklearn.metrics import accuracy_score
9
 
10
+ # 1. Load Data
11
+ # Assuming your data is in a file called 'penguins_lter.csv'
12
+ penguins = pd.read_csv('penguins_lter.csv')
13
+ penguins = penguins.dropna() # Handle missing values
14
+ penguins.drop_duplicates(inplace=True) # Remove duplicates
15
 
16
+ # 2. Define Features and Target
17
+ X = penguins.drop('Species', axis=1)
18
+ y = penguins['Species']
19
 
20
+ # 3. Split Data
21
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
22
+
23
+ # 4. Create Preprocessing Pipeline
24
  numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
25
  categorical_features = ['Island', 'Sex']
26
 
 
27
  numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
28
  categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
 
 
 
 
 
 
 
 
 
 
29
 
30
+ preprocessor = ColumnTransformer(
31
+ transformers=[
32
+ ('num', numerical_transformer, numerical_features),
33
+ ('cat', categorical_transformer, categorical_features)
34
+ ])
35
 
36
+ # 5. Create and Train Model Pipeline
37
+ pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', KNeighborsClassifier())])
38
+ pipeline.fit(X_train, y_train)
39
 
40
+ # 6. Streamlit App
41
+ st.title('Penguin Species Prediction')
 
42
 
43
+ # 6.1 Sidebar for User Input
44
+ st.sidebar.header('Input Features')
45
+ island = st.sidebar.selectbox('Island', penguins['Island'].unique())
46
+ culmen_length = st.sidebar.slider('Culmen Length (mm)', float(penguins['Culmen Length (mm)'].min()), float(penguins['Culmen Length (mm)'].max()))
47
+ culmen_depth = st.sidebar.slider('Culmen Depth (mm)', float(penguins['Culmen Depth (mm)'].min()), float(penguins['Culmen Depth (mm)'].max()))
48
+ flipper_length = st.sidebar.slider('Flipper Length (mm)', float(penguins['Flipper Length (mm)'].min()), float(penguins['Flipper Length (mm)'].max()))
49
+ body_mass = st.sidebar.slider('Body Mass (g)', float(penguins['Body Mass (g)'].min()), float(penguins['Body Mass (g)'].max()))
50
+ sex = st.sidebar.selectbox('Sex', penguins['Sex'].unique())
51
 
52
+ # 6.2 Create Input Dataframe
53
  input_data = pd.DataFrame({
54
+ 'Island': [island],
55
  'Culmen Length (mm)': [culmen_length],
56
  'Culmen Depth (mm)': [culmen_depth],
57
  'Flipper Length (mm)': [flipper_length],
58
  'Body Mass (g)': [body_mass],
 
59
  'Sex': [sex]
60
  })
61
 
62
+ # 6.3 Make Prediction
63
+ prediction = pipeline.predict(input_data)
 
 
 
 
 
 
 
 
 
64
 
65
+ # 6.4 Display Prediction
66
+ st.subheader('Prediction')
67
+ st.write(f"Predicted Penguin Species: {prediction[0]}")