|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
from sklearn.preprocessing import StandardScaler, LabelEncoder |
|
from sklearn.cluster import DBSCAN |
|
from sklearn.ensemble import RandomForestClassifier |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
|
|
|
|
def load_data(): |
|
return pd.read_csv("Animal Dataset.csv") |
|
|
|
|
|
def convert_range_to_float(val): |
|
try: |
|
if isinstance(val, str) and '-' in val: |
|
start, end = map(float, val.split('-')) |
|
return (start + end) / 2 |
|
else: |
|
return float(val) |
|
except: |
|
return np.nan |
|
|
|
|
|
st.title("πΎ Animal Diet Prediction and Lifespan Clustering") |
|
st.markdown("#### Check animal diet and group them according to lifespan using Random Forest Classifier and DBSCAN") |
|
|
|
tabs = st.tabs(["π Supervised Learning", "π Unsupervised Learning"]) |
|
|
|
data = load_data() |
|
|
|
|
|
numeric_features = ['Height (cm)', 'Weight (kg)', 'Average Speed (km/h)', 'Top Speed (km/h)', 'Gestation Period (days)', 'Offspring per Birth', 'Lifespan (years)'] |
|
|
|
|
|
for col in numeric_features: |
|
data[col] = data[col].apply(convert_range_to_float) |
|
|
|
data = data.dropna(subset=numeric_features) |
|
|
|
|
|
if 'Diet' in data.columns: |
|
label_encoder = LabelEncoder() |
|
data['Diet_encoded'] = label_encoder.fit_transform(data['Diet']) |
|
|
|
|
|
with tabs[0]: |
|
st.header("Supervised: Predict Animal Diet Type") |
|
|
|
|
|
st.subheader("Adjust the animal's characteristics to predict its diet") |
|
height = st.slider("Height (cm)", min_value=int(data['Height (cm)'].min()), max_value=int(data['Height (cm)'].max()), help="How tall is the animal?") |
|
weight = st.slider("Weight (kg)", min_value=int(data['Weight (kg)'].min()), max_value=int(data['Weight (kg)'].max()), help="How much does the animal weigh?") |
|
speed = st.slider("Average Speed (km/h)", min_value=int(data['Average Speed (km/h)'].min()), max_value=int(data['Average Speed (km/h)'].max()), help="How fast can the animal move?") |
|
top_speed = st.slider("Top Speed (km/h)", min_value=int(data['Top Speed (km/h)'].min()), max_value=int(data['Top Speed (km/h)'].max()), help="What is the maximum speed the animal can reach?") |
|
gestation = st.slider("Gestation Period (days)", min_value=int(data['Gestation Period (days)'].min()), max_value=int(data['Gestation Period (days)'].max()), help="How long is the animal's pregnancy?") |
|
offspring = st.slider("Offspring per Birth", min_value=int(data['Offspring per Birth'].min()), max_value=int(data['Offspring per Birth'].max()), help="How many offspring does the animal give birth to at once?") |
|
|
|
|
|
input_data = pd.DataFrame({ |
|
'Height (cm)': [height], |
|
'Weight (kg)': [weight], |
|
'Average Speed (km/h)': [speed], |
|
'Top Speed (km/h)': [top_speed], |
|
'Gestation Period (days)': [gestation], |
|
'Offspring per Birth': [offspring] |
|
}) |
|
|
|
|
|
X = data[numeric_features[:-1]] |
|
y = data['Diet_encoded'] |
|
clf = RandomForestClassifier() |
|
clf.fit(X, y) |
|
diet_pred = clf.predict(input_data) |
|
|
|
|
|
predicted_diet = label_encoder.inverse_transform(diet_pred) |
|
st.subheader(f"Predicted Diet: {predicted_diet[0]}") |
|
|
|
|
|
with tabs[1]: |
|
st.header("Unsupervised: Group Animals Based on Lifespan") |
|
st.write("In this part, we will group animals based on their lifespan. This helps to identify patterns in how different animals live and how long they survive.") |
|
|
|
|
|
st.subheader("Choose the lifespan range to group animals") |
|
lifespan_min = st.slider("Lifespan Min (years)", min_value=int(data['Lifespan (years)'].min()), max_value=int(data['Lifespan (years)'].max()), help="The minimum lifespan of the animals you are interested in.") |
|
lifespan_max = st.slider("Lifespan Max (years)", min_value=int(data['Lifespan (years)'].min()), max_value=int(data['Lifespan (years)'].max()), help="The maximum lifespan of the animals you are interested in.") |
|
|
|
|
|
filtered_data = data[(data['Lifespan (years)'] >= lifespan_min) & (data['Lifespan (years)'] <= lifespan_max)] |
|
|
|
|
|
if filtered_data.empty: |
|
st.warning("No animals found with the selected lifespan range. Please adjust the sliders.") |
|
else: |
|
|
|
scaler = StandardScaler() |
|
X_scaled = scaler.fit_transform(filtered_data[numeric_features[:-1]]) |
|
|
|
|
|
db = DBSCAN(eps=1.5, min_samples=5) |
|
clusters = db.fit_predict(X_scaled) |
|
filtered_data['Cluster'] = clusters |
|
|
|
st.subheader("Clustered Animals by Lifespan") |
|
st.write(f"Animals are grouped based on their characteristics (except lifespan). The animals in the same cluster share similarities.") |
|
|
|
|
|
st.subheader("Animals in the Selected Lifespan Range") |
|
st.dataframe(filtered_data[['Animal', 'Lifespan (years)', 'Weight (kg)', 'Cluster']]) |
|
|
|
|
|
st.subheader("Cluster Plot (Lifespan vs Weight)") |
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
sns.scatterplot(x=filtered_data['Lifespan (years)'], y=filtered_data['Weight (kg)'], hue=clusters, palette="tab10", ax=ax) |
|
st.pyplot(fig) |
|
|