import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN

# Title
st.title("🌍 Earthquake Location Clustering")
st.markdown("#### Explore how earthquakes are grouped based on their geographic locations using DBSCAN")

# Load dataset
df = pd.read_csv("database.csv")

# Clean and filter necessary columns
df = df[['Latitude', 'Longitude']].dropna()

# Standardize data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[['Latitude', 'Longitude']])

# Apply DBSCAN
db = DBSCAN(eps=0.3, min_samples=5)
df['Cluster'] = db.fit_predict(X_scaled)

# Tabs
tab1, tab2, tab3 = st.tabs(["🌐 Raw Earthquake Data", "🗺️ Cluster Visualization", "📍 Guess Cluster"])

with tab1:
    st.header("🌐 Earthquake Data")
    st.dataframe(df[['Latitude', 'Longitude', 'Cluster']].head(10))

with tab2:
    st.header("🗺️ Earthquake Clusters Plot")
    st.write("Scatter plot showing how earthquakes are grouped based on their locations.")
    
    fig, ax = plt.subplots(figsize=(10, 6))
    scatter = ax.scatter(df['Longitude'], df['Latitude'], c=df['Cluster'], cmap='rainbow', s=10)
    ax.set_xlabel('Longitude')
    ax.set_ylabel('Latitude')
    ax.set_title('Earthquake Location Clusters')
    st.pyplot(fig)

with tab3:
    st.header("📍 Which Cluster Is An Earthquake In?")
    st.write("Pick a location to see what cluster it would belong to.")

    lat = st.slider("Latitude", float(df['Latitude'].min()), float(df['Latitude'].max()), 0.0)
    lon = st.slider("Longitude", float(df['Longitude'].min()), float(df['Longitude'].max()), 0.0)

    # Scaling the new input data point
    new_point = scaler.transform([[lat, lon]])

    # Apply DBSCAN to the whole dataset including the new point
    updated_data = pd.DataFrame(df[['Latitude', 'Longitude']].values.tolist() + [[lat, lon]], columns=['Latitude', 'Longitude'])
    updated_scaled = scaler.transform(updated_data)

    db_updated = DBSCAN(eps=0.5, min_samples=5)
    clusters = db_updated.fit_predict(updated_scaled)

    new_cluster = clusters[-1]

    if new_cluster == -1:
        st.warning("This point does not belong to any cluster (outlier).")
    else:
        st.success(f"This point likely belongs to **Cluster {new_cluster}**.")