File size: 2,314 Bytes
dd84fb0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
# Title
st.title("π Earthquake Location Clustering")
st.markdown("#### Explore how earthquakes are grouped based on their geographic locations using DBSCAN")
# Load dataset
df = pd.read_csv("database.csv")
# Clean and filter necessary columns
df = df[['Latitude', 'Longitude']].dropna()
# Standardize data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[['Latitude', 'Longitude']])
# Apply DBSCAN
db = DBSCAN(eps=0.3, min_samples=5)
df['Cluster'] = db.fit_predict(X_scaled)
# Tabs
tab1, tab2, tab3 = st.tabs(["π Raw Earthquake Data", "πΊοΈ Cluster Visualization", "π Guess Cluster"])
with tab1:
st.header("π Earthquake Data")
st.dataframe(df[['Latitude', 'Longitude', 'Cluster']].head(10))
with tab2:
st.header("πΊοΈ Earthquake Clusters Plot")
st.write("Scatter plot showing how earthquakes are grouped based on their locations.")
fig, ax = plt.subplots(figsize=(10, 6))
scatter = ax.scatter(df['Longitude'], df['Latitude'], c=df['Cluster'], cmap='rainbow', s=10)
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_title('Earthquake Location Clusters')
st.pyplot(fig)
with tab3:
st.header("π Which Cluster Is An Earthquake In?")
st.write("Pick a location to see what cluster it would belong to.")
lat = st.slider("Latitude", float(df['Latitude'].min()), float(df['Latitude'].max()), 0.0)
lon = st.slider("Longitude", float(df['Longitude'].min()), float(df['Longitude'].max()), 0.0)
# Scaling the new input data point
new_point = scaler.transform([[lat, lon]])
# Apply DBSCAN to the whole dataset including the new point
updated_data = pd.DataFrame(df[['Latitude', 'Longitude']].values.tolist() + [[lat, lon]], columns=['Latitude', 'Longitude'])
updated_scaled = scaler.transform(updated_data)
db_updated = DBSCAN(eps=0.5, min_samples=5)
clusters = db_updated.fit_predict(updated_scaled)
new_cluster = clusters[-1]
if new_cluster == -1:
st.warning("This point does not belong to any cluster (outlier).")
else:
st.success(f"This point likely belongs to **Cluster {new_cluster}**.")
|