Michael Rey
added all files
dd84fb0
raw
history blame contribute delete
2.31 kB
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
# Title
st.title("🌍 Earthquake Location Clustering")
st.markdown("#### Explore how earthquakes are grouped based on their geographic locations using DBSCAN")
# Load dataset
df = pd.read_csv("database.csv")
# Clean and filter necessary columns
df = df[['Latitude', 'Longitude']].dropna()
# Standardize data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[['Latitude', 'Longitude']])
# Apply DBSCAN
db = DBSCAN(eps=0.3, min_samples=5)
df['Cluster'] = db.fit_predict(X_scaled)
# Tabs
tab1, tab2, tab3 = st.tabs(["🌐 Raw Earthquake Data", "πŸ—ΊοΈ Cluster Visualization", "πŸ“ Guess Cluster"])
with tab1:
st.header("🌐 Earthquake Data")
st.dataframe(df[['Latitude', 'Longitude', 'Cluster']].head(10))
with tab2:
st.header("πŸ—ΊοΈ Earthquake Clusters Plot")
st.write("Scatter plot showing how earthquakes are grouped based on their locations.")
fig, ax = plt.subplots(figsize=(10, 6))
scatter = ax.scatter(df['Longitude'], df['Latitude'], c=df['Cluster'], cmap='rainbow', s=10)
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_title('Earthquake Location Clusters')
st.pyplot(fig)
with tab3:
st.header("πŸ“ Which Cluster Is An Earthquake In?")
st.write("Pick a location to see what cluster it would belong to.")
lat = st.slider("Latitude", float(df['Latitude'].min()), float(df['Latitude'].max()), 0.0)
lon = st.slider("Longitude", float(df['Longitude'].min()), float(df['Longitude'].max()), 0.0)
# Scaling the new input data point
new_point = scaler.transform([[lat, lon]])
# Apply DBSCAN to the whole dataset including the new point
updated_data = pd.DataFrame(df[['Latitude', 'Longitude']].values.tolist() + [[lat, lon]], columns=['Latitude', 'Longitude'])
updated_scaled = scaler.transform(updated_data)
db_updated = DBSCAN(eps=0.5, min_samples=5)
clusters = db_updated.fit_predict(updated_scaled)
new_cluster = clusters[-1]
if new_cluster == -1:
st.warning("This point does not belong to any cluster (outlier).")
else:
st.success(f"This point likely belongs to **Cluster {new_cluster}**.")