Spaces:

Xiangliyao
/

classification-test

Sleeping

narinsak unawong

Update app.py

fbb40ed verified 8 months ago

1.87 kB

	import streamlit as st
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.metrics import classification_report

	# Load your data (replace with your actual data loading)
	# Assuming you have a CSV file named 'penguins_lter.csv' in your working directory
	try:
	df = pd.read_csv('penguins_lter.csv')
	except FileNotFoundError:
	st.error("Error: 'penguins_lter.csv' not found. Please upload the file or adjust the path.")
	st.stop()


	# Data preprocessing (handle missing values)
	numeric_cols = df.select_dtypes(include=['number']).columns
	for col in numeric_cols:
	df[col].fillna(df[col].mean(), inplace=True)

	categorical_cols = df.select_dtypes(exclude=['number']).columns
	for col in categorical_cols:
	df[col].fillna(df[col].mode()[0], inplace=True)


	# Model training and prediction (same as your original code)

	# Assuming 'Species' is your target variable
	X = df.drop('Species', axis=1)
	y = df['Species']

	# Convert categorical features to numerical using one-hot encoding
	X = pd.get_dummies(X, drop_first=True)

	# Split data into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Create a pipeline
	pipeline = Pipeline([
	('scaler', StandardScaler()),
	('knn', KNeighborsClassifier(n_neighbors=5))
	])

	# Train the pipeline
	pipeline.fit(X_train, y_train)

	# Make predictions
	y_pred = pipeline.predict(X_test)


	# Streamlit app
	st.title("Penguin Species Classification")

	st.write("This app predicts the species of a penguin based on its physical characteristics.")

	# Display classification report
	st.subheader("Classification Report")
	st.text(classification_report(y_test, y_pred))

	st.dataframe(df.head())