Spaces:

Xiangliyao
/

classification-test

Sleeping

App Files Files Community

narinsak unawong commited on Nov 10, 2024

Commit

64afd26

verified ·

1 Parent(s): ae96213

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -53

app.py CHANGED Viewed

@@ -2,66 +2,72 @@ import streamlit as st
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import StandardScaler, OneHotEncoder
-from sklearn.compose import ColumnTransformer
 from sklearn.neighbors import KNeighborsClassifier
-from sklearn.metrics import accuracy_score
-# 1. Load Data
-# Assuming your data is in a file called 'penguins_lter.csv'
-penguins = pd.read_csv('penguins_lter.csv')
-penguins = penguins.dropna()  # Handle missing values
-penguins.drop_duplicates(inplace=True)  # Remove duplicates
-# 2. Define Features and Target
-X = penguins.drop('Species', axis=1)
-y = penguins['Species']
-# 3. Split Data
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-# 4. Create Preprocessing Pipeline
-numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
-categorical_features = ['Island', 'Sex']
-numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
-categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
-preprocessor = ColumnTransformer(
-    transformers=[
-        ('num', numerical_transformer, numerical_features),
-        ('cat', categorical_transformer, categorical_features)
-    ])
-# 5. Create and Train Model Pipeline
-pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', KNeighborsClassifier())])
-pipeline.fit(X_train, y_train)
-# 6. Streamlit App
-st.title('Penguin Species Prediction')
-# 6.1 Sidebar for User Input
-st.sidebar.header('Input Features')
-island = st.sidebar.selectbox('Island', penguins['Island'].unique())
-culmen_length = st.sidebar.slider('Culmen Length (mm)', float(penguins['Culmen Length (mm)'].min()), float(penguins['Culmen Length (mm)'].max()))
-culmen_depth = st.sidebar.slider('Culmen Depth (mm)', float(penguins['Culmen Depth (mm)'].min()), float(penguins['Culmen Depth (mm)'].max()))
-flipper_length = st.sidebar.slider('Flipper Length (mm)', float(penguins['Flipper Length (mm)'].min()), float(penguins['Flipper Length (mm)'].max()))
-body_mass = st.sidebar.slider('Body Mass (g)', float(penguins['Body Mass (g)'].min()), float(penguins['Body Mass (g)'].max()))
-sex = st.sidebar.selectbox('Sex', penguins['Sex'].unique())
-# 6.2 Create Input Dataframe
-input_data = pd.DataFrame({
-    'Island': [island],
-    'Culmen Length (mm)': [culmen_length],
-    'Culmen Depth (mm)': [culmen_depth],
-    'Flipper Length (mm)': [flipper_length],
-    'Body Mass (g)': [body_mass],
-    'Sex': [sex]
-})
-# 6.3 Make Prediction
-prediction = pipeline.predict(input_data)
-# 6.4 Display Prediction
-st.subheader('Prediction')
-st.write(f"Predicted Penguin Species: {prediction[0]}")

 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
 from sklearn.neighbors import KNeighborsClassifier
+from sklearn.metrics import classification_report
+# Load your data (replace with your actual file path)
+df = pd.read_csv('penguins_lter.csv')
+# Data preprocessing (same as in your previous code)
+numeric_cols = df.select_dtypes(include=['number']).columns
+for col in numeric_cols:
+    df[col].fillna(df[col].mean(), inplace=True)
+categorical_cols = df.select_dtypes(exclude=['number']).columns
+for col in categorical_cols:
+    df[col].fillna(df[col].mode()[0], inplace=True)
+# Feature Engineering and Model Training (same as in your previous code)
+X = df.drop('Species', axis=1)
+y = df['Species']
+X = pd.get_dummies(X, drop_first=True)
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+pipeline = Pipeline([
+    ('scaler', StandardScaler()),
+    ('knn', KNeighborsClassifier(n_neighbors=5))
+])
+pipeline.fit(X_train, y_train)
+y_pred = pipeline.predict(X_test)
+report = classification_report(y_test, y_pred, output_dict=True)
+# Streamlit app
+st.title("Penguin Species Classification")
+st.write("This app predicts the species of a penguin based on its features.")
+# Display the classification report
+st.subheader("Classification Report")
+st.write(pd.DataFrame(report).transpose())
+# Add input fields for user input (example)
+st.sidebar.header("Penguin Features")
+# Example input fields (replace with your actual features)
+bill_length_mm = st.sidebar.number_input("Bill Length (mm)", min_value=0.0, value=40.0)
+bill_depth_mm = st.sidebar.number_input("Bill Depth (mm)", min_value=0.0, value=15.0)
+# ... Add more input fields for other features ...
+#Create a dictionary to store the user inputs
+user_input_dict = {
+    'bill_length_mm': bill_length_mm,
+    'bill_depth_mm': bill_depth_mm,
+    # ... Add other features here
+}
+# Create a dataframe for prediction
+user_input_df = pd.DataFrame([user_input_dict])
+user_input_df = pd.get_dummies(user_input_df, drop_first=True) # Apply the same one-hot encoding
+if st.sidebar.button("Predict"):
+    # Align the columns of user_input_df and X_train
+    missing_cols = set(X_train.columns) - set(user_input_df.columns)
+    for c in missing_cols:
+        user_input_df[c] = 0  # Add missing columns with value 0
+    user_input_df = user_input_df[X_train.columns] # Reorder the columns
+    prediction = pipeline.predict(user_input_df)
+    st.write(f"Predicted Species: {prediction[0]}")