Spaces:

michaelryt
/

Iris_Flower_Clustering_Using_DBSCAN

Sleeping

Iris_Flower_Clustering_Using_DBSCAN / app.py

Michael Rey

added all files

dd84fb0 2 months ago

2.31 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	from sklearn.preprocessing import StandardScaler
	from sklearn.cluster import DBSCAN

	# Title
	st.title("🌍 Earthquake Location Clustering")
	st.markdown("#### Explore how earthquakes are grouped based on their geographic locations using DBSCAN")

	# Load dataset
	df = pd.read_csv("database.csv")

	# Clean and filter necessary columns
	df = df[['Latitude', 'Longitude']].dropna()

	# Standardize data
	scaler = StandardScaler()
	X_scaled = scaler.fit_transform(df[['Latitude', 'Longitude']])

	# Apply DBSCAN
	db = DBSCAN(eps=0.3, min_samples=5)
	df['Cluster'] = db.fit_predict(X_scaled)

	# Tabs
	tab1, tab2, tab3 = st.tabs(["🌐 Raw Earthquake Data", "🗺️ Cluster Visualization", "📍 Guess Cluster"])

	with tab1:
	st.header("🌐 Earthquake Data")
	st.dataframe(df[['Latitude', 'Longitude', 'Cluster']].head(10))

	with tab2:
	st.header("🗺️ Earthquake Clusters Plot")
	st.write("Scatter plot showing how earthquakes are grouped based on their locations.")

	fig, ax = plt.subplots(figsize=(10, 6))
	scatter = ax.scatter(df['Longitude'], df['Latitude'], c=df['Cluster'], cmap='rainbow', s=10)
	ax.set_xlabel('Longitude')
	ax.set_ylabel('Latitude')
	ax.set_title('Earthquake Location Clusters')
	st.pyplot(fig)

	with tab3:
	st.header("📍 Which Cluster Is An Earthquake In?")
	st.write("Pick a location to see what cluster it would belong to.")

	lat = st.slider("Latitude", float(df['Latitude'].min()), float(df['Latitude'].max()), 0.0)
	lon = st.slider("Longitude", float(df['Longitude'].min()), float(df['Longitude'].max()), 0.0)

	# Scaling the new input data point
	new_point = scaler.transform([[lat, lon]])

	# Apply DBSCAN to the whole dataset including the new point
	updated_data = pd.DataFrame(df[['Latitude', 'Longitude']].values.tolist() + [[lat, lon]], columns=['Latitude', 'Longitude'])
	updated_scaled = scaler.transform(updated_data)

	db_updated = DBSCAN(eps=0.5, min_samples=5)
	clusters = db_updated.fit_predict(updated_scaled)

	new_cluster = clusters[-1]

	if new_cluster == -1:
	st.warning("This point does not belong to any cluster (outlier).")
	else:
	st.success(f"This point likely belongs to Cluster {new_cluster}.")