narinsak commited on
Commit
64afd26
1 Parent(s): ae96213

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -53
app.py CHANGED
@@ -2,66 +2,72 @@ import streamlit as st
2
  import pandas as pd
3
  from sklearn.model_selection import train_test_split
4
  from sklearn.pipeline import Pipeline
5
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
6
- from sklearn.compose import ColumnTransformer
7
  from sklearn.neighbors import KNeighborsClassifier
8
- from sklearn.metrics import accuracy_score
9
 
10
- # 1. Load Data
11
- # Assuming your data is in a file called 'penguins_lter.csv'
12
- penguins = pd.read_csv('penguins_lter.csv')
13
- penguins = penguins.dropna() # Handle missing values
14
- penguins.drop_duplicates(inplace=True) # Remove duplicates
15
 
16
- # 2. Define Features and Target
17
- X = penguins.drop('Species', axis=1)
18
- y = penguins['Species']
 
19
 
20
- # 3. Split Data
 
 
 
 
 
 
 
21
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
22
 
23
- # 4. Create Preprocessing Pipeline
24
- numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
25
- categorical_features = ['Island', 'Sex']
 
 
 
 
26
 
27
- numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
28
- categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
29
 
30
- preprocessor = ColumnTransformer(
31
- transformers=[
32
- ('num', numerical_transformer, numerical_features),
33
- ('cat', categorical_transformer, categorical_features)
34
- ])
35
 
36
- # 5. Create and Train Model Pipeline
37
- pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', KNeighborsClassifier())])
38
- pipeline.fit(X_train, y_train)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- # 6. Streamlit App
41
- st.title('Penguin Species Prediction')
42
-
43
- # 6.1 Sidebar for User Input
44
- st.sidebar.header('Input Features')
45
- island = st.sidebar.selectbox('Island', penguins['Island'].unique())
46
- culmen_length = st.sidebar.slider('Culmen Length (mm)', float(penguins['Culmen Length (mm)'].min()), float(penguins['Culmen Length (mm)'].max()))
47
- culmen_depth = st.sidebar.slider('Culmen Depth (mm)', float(penguins['Culmen Depth (mm)'].min()), float(penguins['Culmen Depth (mm)'].max()))
48
- flipper_length = st.sidebar.slider('Flipper Length (mm)', float(penguins['Flipper Length (mm)'].min()), float(penguins['Flipper Length (mm)'].max()))
49
- body_mass = st.sidebar.slider('Body Mass (g)', float(penguins['Body Mass (g)'].min()), float(penguins['Body Mass (g)'].max()))
50
- sex = st.sidebar.selectbox('Sex', penguins['Sex'].unique())
51
-
52
- # 6.2 Create Input Dataframe
53
- input_data = pd.DataFrame({
54
- 'Island': [island],
55
- 'Culmen Length (mm)': [culmen_length],
56
- 'Culmen Depth (mm)': [culmen_depth],
57
- 'Flipper Length (mm)': [flipper_length],
58
- 'Body Mass (g)': [body_mass],
59
- 'Sex': [sex]
60
- })
61
-
62
- # 6.3 Make Prediction
63
- prediction = pipeline.predict(input_data)
64
-
65
- # 6.4 Display Prediction
66
- st.subheader('Prediction')
67
- st.write(f"Predicted Penguin Species: {prediction[0]}")
 
2
  import pandas as pd
3
  from sklearn.model_selection import train_test_split
4
  from sklearn.pipeline import Pipeline
5
+ from sklearn.preprocessing import StandardScaler
 
6
  from sklearn.neighbors import KNeighborsClassifier
7
+ from sklearn.metrics import classification_report
8
 
9
+ # Load your data (replace with your actual file path)
10
+ df = pd.read_csv('penguins_lter.csv')
 
 
 
11
 
12
+ # Data preprocessing (same as in your previous code)
13
+ numeric_cols = df.select_dtypes(include=['number']).columns
14
+ for col in numeric_cols:
15
+ df[col].fillna(df[col].mean(), inplace=True)
16
 
17
+ categorical_cols = df.select_dtypes(exclude=['number']).columns
18
+ for col in categorical_cols:
19
+ df[col].fillna(df[col].mode()[0], inplace=True)
20
+
21
+ # Feature Engineering and Model Training (same as in your previous code)
22
+ X = df.drop('Species', axis=1)
23
+ y = df['Species']
24
+ X = pd.get_dummies(X, drop_first=True)
25
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
26
 
27
+ pipeline = Pipeline([
28
+ ('scaler', StandardScaler()),
29
+ ('knn', KNeighborsClassifier(n_neighbors=5))
30
+ ])
31
+ pipeline.fit(X_train, y_train)
32
+ y_pred = pipeline.predict(X_test)
33
+ report = classification_report(y_test, y_pred, output_dict=True)
34
 
35
+ # Streamlit app
36
+ st.title("Penguin Species Classification")
37
 
38
+ st.write("This app predicts the species of a penguin based on its features.")
 
 
 
 
39
 
40
+ # Display the classification report
41
+ st.subheader("Classification Report")
42
+ st.write(pd.DataFrame(report).transpose())
43
+
44
+
45
+ # Add input fields for user input (example)
46
+ st.sidebar.header("Penguin Features")
47
+
48
+ # Example input fields (replace with your actual features)
49
+ bill_length_mm = st.sidebar.number_input("Bill Length (mm)", min_value=0.0, value=40.0)
50
+ bill_depth_mm = st.sidebar.number_input("Bill Depth (mm)", min_value=0.0, value=15.0)
51
+ # ... Add more input fields for other features ...
52
+
53
+ #Create a dictionary to store the user inputs
54
+ user_input_dict = {
55
+ 'bill_length_mm': bill_length_mm,
56
+ 'bill_depth_mm': bill_depth_mm,
57
+ # ... Add other features here
58
+ }
59
+
60
+ # Create a dataframe for prediction
61
+ user_input_df = pd.DataFrame([user_input_dict])
62
+ user_input_df = pd.get_dummies(user_input_df, drop_first=True) # Apply the same one-hot encoding
63
+
64
+
65
+ if st.sidebar.button("Predict"):
66
+ # Align the columns of user_input_df and X_train
67
+ missing_cols = set(X_train.columns) - set(user_input_df.columns)
68
+ for c in missing_cols:
69
+ user_input_df[c] = 0 # Add missing columns with value 0
70
+ user_input_df = user_input_df[X_train.columns] # Reorder the columns
71
 
72
+ prediction = pipeline.predict(user_input_df)
73
+ st.write(f"Predicted Species: {prediction[0]}")