RJuro commited on
Commit
55f2059
·
verified ·
1 Parent(s): 3c3d5bc

Upload 6 files

Browse files
README.md CHANGED
@@ -1,19 +1,16 @@
1
  ---
2
- title: Penguins Predictor Inclass
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
  pinned: false
11
- short_description: Streamlit template space
12
  ---
13
 
14
- # Welcome to Streamlit!
15
 
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
 
17
 
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
1
  ---
2
+ title: Palmer Penguin Species Predictor
3
+ emoji: 🐧
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: streamlit
7
+ app_file: penguin_streamlit_app.py
 
 
8
  pinned: false
 
9
  ---
10
 
11
+ # Palmer Penguin Species Predictor
12
 
13
+ This Streamlit application predicts the species of Palmer Penguins (Adelie, Chinstrap, or Gentoo)
14
+ based on their culmen length, culmen depth, flipper length, and body mass.
15
 
16
+ The model was trained on the Palmer Penguins dataset.
 
label_encoder.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2de28ef9c07a06b12496cf1c58ced82f158c1d246fe8cf755ee73d48cd9b8cae
3
+ size 561
logistic_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6e071b1e6a70faf8f77ca05493b7505d619f1a7974267ecb848e5f0e063d49d
3
+ size 1007
penguin_streamlit_app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import os
6
+ import requests
7
+
8
+ # Ensure these classes are available for joblib to unpickle
9
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
10
+ from sklearn.linear_model import LogisticRegression
11
+
12
+ # --- Configuration ---
13
+ # Define paths to the artifacts
14
+ MODEL_PATH = 'logistic_model.joblib'
15
+ ENCODER_PATH = 'label_encoder.joblib'
16
+ SCALER_PATH = 'scaler.joblib'
17
+
18
+ # --- Load Artifacts ---
19
+ @st.cache_resource # Cache loading for performance
20
+ def load_artifacts():
21
+ try:
22
+ model = joblib.load(MODEL_PATH)
23
+ label_encoder = joblib.load(ENCODER_PATH)
24
+ scaler = joblib.load(SCALER_PATH)
25
+ return model, label_encoder, scaler
26
+ except FileNotFoundError as e:
27
+ st.error(f"Error: One or more artifact files not found. {e}")
28
+ st.error(f"Expected files: {MODEL_PATH}, {ENCODER_PATH}, {SCALER_PATH}")
29
+ return None, None, None
30
+ except Exception as e:
31
+ st.error(f"An error occurred while loading artifacts: {e}")
32
+ return None, None, None
33
+
34
+ # --- Feature Statistics for Input Guidance ---
35
+ # These values are derived from the training data
36
+ feature_stats = {
37
+ 'culmen_length_mm': {'min': 32.1, 'max': 59.6, 'mean': 43.92, 'step': 0.1},
38
+ 'culmen_depth_mm': {'min': 13.1, 'max': 21.5, 'mean': 17.15, 'step': 0.1},
39
+ 'flipper_length_mm': {'min': 172.0, 'max': 231.0, 'mean': 200.92, 'step': 1.0},
40
+ 'body_mass_g': {'min': 2700.0, 'max': 6300.0, 'mean': 4207.06, 'step': 100.0}
41
+ }
42
+
43
+ # --- Penguin Species Images ---
44
+ species_image_map = {
45
+ "Adelie": "https://upload.wikimedia.org/wikipedia/commons/e/e3/Hope_Bay-2016-Trinity_Peninsula–Adélie_penguin_%28Pygoscelis_adeliae%29_04.jpg",
46
+ "Gentoo": "https://upload.wikimedia.org/wikipedia/commons/0/00/Brown_Bluff-2016-Tabarin_Peninsula–Gentoo_penguin_%28Pygoscelis_papua%29_03.jpg",
47
+ "Chinstrap": "https://upload.wikimedia.org/wikipedia/commons/0/08/South_Shetland-2016-Deception_Island–Chinstrap_penguin_%28Pygoscelis_antarctica%29_04.jpg"
48
+ }
49
+
50
+ # --- App UI ---
51
+ st.set_page_config(
52
+ page_title="Palmer Penguin Predictor",
53
+ page_icon="🐧",
54
+ layout="wide"
55
+ )
56
+
57
+ st.title("🐧 Palmer Penguin Species Predictor")
58
+ st.markdown("""
59
+ This app predicts the species of a Palmer Penguin based on its physical measurements.
60
+ Enter the measurements in the sidebar and click 'Predict' to see the results!
61
+ """)
62
+
63
+ # Load the model and preprocessors
64
+ model, label_encoder, scaler = load_artifacts()
65
+
66
+ if model is not None and label_encoder is not None and scaler is not None:
67
+ # Move input controls to sidebar
68
+ st.sidebar.header("Input Penguin Measurements")
69
+
70
+ # Create input fields for each feature in the sidebar
71
+ inputs = {}
72
+ for feature, stats in feature_stats.items():
73
+ # Create a more user-friendly label
74
+ label = feature.replace('_', ' ').title()
75
+ unit = "mm" if "mm" in feature else "g"
76
+
77
+ inputs[feature] = st.sidebar.slider(
78
+ f"{label} ({unit})",
79
+ min_value=float(stats['min']),
80
+ max_value=float(stats['max']),
81
+ value=float(stats['mean']),
82
+ step=stats['step'],
83
+ help=f"Typical range: {stats['min']} - {stats['max']} (Average: {stats['mean']})"
84
+ )
85
+
86
+ # Create a button to trigger prediction in the sidebar
87
+ predict_button = st.sidebar.button("🔍 Predict Penguin Species", type="primary")
88
+
89
+ # Main content area
90
+ if predict_button:
91
+ # Create a DataFrame from inputs
92
+ input_df = pd.DataFrame([inputs])
93
+
94
+ # Display the input values
95
+ st.subheader("Your Input Values:")
96
+ st.dataframe(input_df.style.format("{:.1f}"))
97
+
98
+ # Scale the input features
99
+ input_scaled = scaler.transform(input_df)
100
+
101
+ # Make prediction
102
+ prediction_encoded = model.predict(input_scaled)
103
+ prediction_proba = model.predict_proba(input_scaled)
104
+
105
+ # Decode the prediction
106
+ predicted_species = label_encoder.inverse_transform(prediction_encoded)[0]
107
+
108
+ # Display the prediction result
109
+ st.subheader("Prediction Result:")
110
+ st.markdown(f"### This penguin is a **{predicted_species}**!")
111
+
112
+ # Display the probabilities
113
+ st.subheader("Prediction Probabilities:")
114
+ proba_df = pd.DataFrame(
115
+ prediction_proba,
116
+ columns=label_encoder.classes_
117
+ )
118
+ st.dataframe(proba_df.style.format("{:.2%}"))
119
+
120
+ # Display the penguin image using streamlit's image component directly
121
+ st.subheader(f"{predicted_species} Penguin:")
122
+ st.image(species_image_map[predicted_species], width=400, caption=f"{predicted_species} Penguin")
123
+
124
+ # Add information about the features
125
+ with st.expander("About the Measurements"):
126
+ st.markdown("""
127
+ ### Penguin Measurements Explained
128
+
129
+ - **Culmen Length**: The length of the penguin's bill (in mm)
130
+ - **Culmen Depth**: The depth (height) of the penguin's bill (in mm)
131
+ - **Flipper Length**: The length of the penguin's flipper (in mm)
132
+ - **Body Mass**: The weight of the penguin (in grams)
133
+
134
+ These measurements are used by researchers to study penguin populations and can also help identify different species.
135
+ """)
136
+
137
+ # Show a table of the feature statistics
138
+ st.subheader("Feature Statistics from Training Data:")
139
+ stats_df = pd.DataFrame(feature_stats).T
140
+ st.dataframe(stats_df.style.format("{:.1f}"))
141
+
142
+ # Add information about the penguin species
143
+ with st.expander("About the Penguin Species"):
144
+ st.markdown("""
145
+ ### Palmer Penguin Species
146
+
147
+ The Palmer Archipelago in Antarctica is home to three penguin species:
148
+
149
+ - **Adelie**: Smaller penguins with a white ring around the eye
150
+ - **Chinstrap**: Named for the narrow black band under their head
151
+ - **Gentoo**: Larger penguins with bright orange-red bills and feet
152
+
153
+ This model was trained on the Palmer Penguins dataset, which contains measurements of these three species.
154
+ """)
155
+
156
+ # Display all three penguin species images using streamlit's image component
157
+ species_cols = st.columns(3)
158
+ for i, (species, url) in enumerate(species_image_map.items()):
159
+ with species_cols[i]:
160
+ st.markdown(f"**{species}**")
161
+ st.image(url, width=200, caption=species)
162
+
163
+ # Footer
164
+ st.markdown("---")
165
+ st.markdown("Created with Streamlit • Data from [Palmer Penguins Dataset](https://github.com/allisonhorst/palmerpenguins)")
166
+
167
+ else:
168
+ st.error("Could not load the model or preprocessors. Please check that the model files exist in the correct location.")
169
+ st.info("Make sure you've run the training script first to generate the model files.")
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
- altair
2
  pandas
3
- streamlit
 
 
 
 
1
+ streamlit
2
  pandas
3
+ numpy
4
+ scikit-learn
5
+ requests
6
+ joblib
scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b68dfae5697d6847f2b92c0e0e1a59193e6193a1072dad34dee6a3a63ff84d6d
3
+ size 1095