import streamlit as st import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.cluster import DBSCAN # Title st.title("🌍 Earthquake Location Clustering") st.markdown("#### Explore how earthquakes are grouped based on their geographic locations using DBSCAN") # Load dataset df = pd.read_csv("database.csv") # Clean and filter necessary columns df = df[['Latitude', 'Longitude']].dropna() # Standardize data scaler = StandardScaler() X_scaled = scaler.fit_transform(df[['Latitude', 'Longitude']]) # Apply DBSCAN db = DBSCAN(eps=0.3, min_samples=5) df['Cluster'] = db.fit_predict(X_scaled) # Tabs tab1, tab2, tab3 = st.tabs(["🌐 Raw Earthquake Data", "πŸ—ΊοΈ Cluster Visualization", "πŸ“ Guess Cluster"]) with tab1: st.header("🌐 Earthquake Data") st.dataframe(df[['Latitude', 'Longitude', 'Cluster']].head(10)) with tab2: st.header("πŸ—ΊοΈ Earthquake Clusters Plot") st.write("Scatter plot showing how earthquakes are grouped based on their locations.") fig, ax = plt.subplots(figsize=(10, 6)) scatter = ax.scatter(df['Longitude'], df['Latitude'], c=df['Cluster'], cmap='rainbow', s=10) ax.set_xlabel('Longitude') ax.set_ylabel('Latitude') ax.set_title('Earthquake Location Clusters') st.pyplot(fig) with tab3: st.header("πŸ“ Which Cluster Is An Earthquake In?") st.write("Pick a location to see what cluster it would belong to.") lat = st.slider("Latitude", float(df['Latitude'].min()), float(df['Latitude'].max()), 0.0) lon = st.slider("Longitude", float(df['Longitude'].min()), float(df['Longitude'].max()), 0.0) # Scaling the new input data point new_point = scaler.transform([[lat, lon]]) # Apply DBSCAN to the whole dataset including the new point updated_data = pd.DataFrame(df[['Latitude', 'Longitude']].values.tolist() + [[lat, lon]], columns=['Latitude', 'Longitude']) updated_scaled = scaler.transform(updated_data) db_updated = DBSCAN(eps=0.5, min_samples=5) clusters = db_updated.fit_predict(updated_scaled) new_cluster = clusters[-1] if new_cluster == -1: st.warning("This point does not belong to any cluster (outlier).") else: st.success(f"This point likely belongs to **Cluster {new_cluster}**.")