Michael Rey commited on
Commit
dd84fb0
Β·
1 Parent(s): a486ddb

added all files

Browse files
Files changed (4) hide show
  1. README.md +2 -2
  2. app.py +65 -0
  3. database.csv +0 -0
  4. requirements.txt +5 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Iris Flower Clustering Using DBSCAN
3
- emoji: 🌍
4
  colorFrom: purple
5
  colorTo: green
6
  sdk: streamlit
 
1
  ---
2
+ title: Earthquake Location Clustering Using DBSCAN
3
+ emoji: 🌐
4
  colorFrom: purple
5
  colorTo: green
6
  sdk: streamlit
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ from sklearn.preprocessing import StandardScaler
5
+ from sklearn.cluster import DBSCAN
6
+
7
+ # Title
8
+ st.title("🌍 Earthquake Location Clustering")
9
+ st.markdown("#### Explore how earthquakes are grouped based on their geographic locations using DBSCAN")
10
+
11
+ # Load dataset
12
+ df = pd.read_csv("database.csv")
13
+
14
+ # Clean and filter necessary columns
15
+ df = df[['Latitude', 'Longitude']].dropna()
16
+
17
+ # Standardize data
18
+ scaler = StandardScaler()
19
+ X_scaled = scaler.fit_transform(df[['Latitude', 'Longitude']])
20
+
21
+ # Apply DBSCAN
22
+ db = DBSCAN(eps=0.3, min_samples=5)
23
+ df['Cluster'] = db.fit_predict(X_scaled)
24
+
25
+ # Tabs
26
+ tab1, tab2, tab3 = st.tabs(["🌐 Raw Earthquake Data", "πŸ—ΊοΈ Cluster Visualization", "πŸ“ Guess Cluster"])
27
+
28
+ with tab1:
29
+ st.header("🌐 Earthquake Data")
30
+ st.dataframe(df[['Latitude', 'Longitude', 'Cluster']].head(10))
31
+
32
+ with tab2:
33
+ st.header("πŸ—ΊοΈ Earthquake Clusters Plot")
34
+ st.write("Scatter plot showing how earthquakes are grouped based on their locations.")
35
+
36
+ fig, ax = plt.subplots(figsize=(10, 6))
37
+ scatter = ax.scatter(df['Longitude'], df['Latitude'], c=df['Cluster'], cmap='rainbow', s=10)
38
+ ax.set_xlabel('Longitude')
39
+ ax.set_ylabel('Latitude')
40
+ ax.set_title('Earthquake Location Clusters')
41
+ st.pyplot(fig)
42
+
43
+ with tab3:
44
+ st.header("πŸ“ Which Cluster Is An Earthquake In?")
45
+ st.write("Pick a location to see what cluster it would belong to.")
46
+
47
+ lat = st.slider("Latitude", float(df['Latitude'].min()), float(df['Latitude'].max()), 0.0)
48
+ lon = st.slider("Longitude", float(df['Longitude'].min()), float(df['Longitude'].max()), 0.0)
49
+
50
+ # Scaling the new input data point
51
+ new_point = scaler.transform([[lat, lon]])
52
+
53
+ # Apply DBSCAN to the whole dataset including the new point
54
+ updated_data = pd.DataFrame(df[['Latitude', 'Longitude']].values.tolist() + [[lat, lon]], columns=['Latitude', 'Longitude'])
55
+ updated_scaled = scaler.transform(updated_data)
56
+
57
+ db_updated = DBSCAN(eps=0.5, min_samples=5)
58
+ clusters = db_updated.fit_predict(updated_scaled)
59
+
60
+ new_cluster = clusters[-1]
61
+
62
+ if new_cluster == -1:
63
+ st.warning("This point does not belong to any cluster (outlier).")
64
+ else:
65
+ st.success(f"This point likely belongs to **Cluster {new_cluster}**.")
database.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ matplotlib
4
+ scikit-learn
5
+ numpy