Upload 4 files
Browse files- encoder_categories.pkl +3 -0
- interface.py +123 -0
- kmeans_model.pkl +3 -0
- scaler.pkl +3 -0
encoder_categories.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5153392d11e8ec872d73c6b732298af6c8384d120d4a4d6cb6c05cc14cd4b3a0
|
3 |
+
size 100
|
interface.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import joblib
|
4 |
+
from math import radians, sin, cos, sqrt, atan2
|
5 |
+
import folium
|
6 |
+
|
7 |
+
# Load saved scaler and model
|
8 |
+
scaler = joblib.load('scaler.pkl')
|
9 |
+
kmeans_model = joblib.load('kmeans_model.pkl')
|
10 |
+
|
11 |
+
# Load the dataset
|
12 |
+
csv_file_path = 'properties.csv'
|
13 |
+
df = pd.read_csv(csv_file_path)
|
14 |
+
|
15 |
+
# Define the haversine function for distance calculation
|
16 |
+
def haversine(lat1, lon1, lat2, lon2):
|
17 |
+
R = 6371 # Radius of the Earth in kilometers
|
18 |
+
dlat = radians(lat2 - lat1)
|
19 |
+
dlon = radians(lon2 - lon1)
|
20 |
+
a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2
|
21 |
+
c = 2 * atan2(sqrt(a), sqrt(1 - a))
|
22 |
+
return R * c
|
23 |
+
|
24 |
+
# Calculate the centroid for distance computation
|
25 |
+
centroid_lat = df['Latitude'].mean()
|
26 |
+
centroid_lon = df['Longitude'].mean()
|
27 |
+
|
28 |
+
# OneHotEncoder category mapping
|
29 |
+
property_type_mapping = joblib.load('encoder_categories.pkl')
|
30 |
+
|
31 |
+
# Cluster descriptions
|
32 |
+
cluster_descriptions = {
|
33 |
+
0: "Hotel/Others - Away from City Center",
|
34 |
+
1: "Apartments",
|
35 |
+
2: "Medium-large hotels - Close to the city centre",
|
36 |
+
3: "Guest House",
|
37 |
+
4: "Homestay",
|
38 |
+
5: "Resorts",
|
39 |
+
6: "Villa",
|
40 |
+
7: "Apart-hotels",
|
41 |
+
8: "Lodge",
|
42 |
+
9: "Small hotels - Close to the city centre"
|
43 |
+
}
|
44 |
+
|
45 |
+
# Gradio prediction function
|
46 |
+
def predict_cluster(property_name, star_rating, latitude, longitude, property_type):
|
47 |
+
# Validate inputs
|
48 |
+
if not (-90 <= latitude <= 90) or not (-180 <= longitude <= 180):
|
49 |
+
return "Error: Latitude must be between -90 and 90, and longitude must be between -180 and 180."
|
50 |
+
if not (0 <= star_rating <= 5):
|
51 |
+
return "Error: Star rating must be between 0 and 5."
|
52 |
+
|
53 |
+
# Calculate distance from the center
|
54 |
+
distance_from_center = haversine(latitude, longitude, centroid_lat, centroid_lon)
|
55 |
+
|
56 |
+
# One-hot encode the property type
|
57 |
+
try:
|
58 |
+
property_type_index = property_type_mapping.index(property_type)
|
59 |
+
except ValueError:
|
60 |
+
return f"Error: Property type '{property_type}' is not in the recognized categories."
|
61 |
+
|
62 |
+
# Create a one-hot-encoded vector with the correct length
|
63 |
+
encoded_property_type = [0] * len(property_type_mapping)
|
64 |
+
encoded_property_type[property_type_index] = 1
|
65 |
+
|
66 |
+
# Prepare the feature vector
|
67 |
+
feature_vector = [star_rating, distance_from_center] + encoded_property_type
|
68 |
+
|
69 |
+
# Standardize the feature vector
|
70 |
+
try:
|
71 |
+
scaled_features = scaler.transform([feature_vector])
|
72 |
+
except ValueError as e:
|
73 |
+
return f"Error during scaling: {e}"
|
74 |
+
|
75 |
+
# Predict the cluster
|
76 |
+
cluster = kmeans_model.predict(scaled_features)[0]
|
77 |
+
description = cluster_descriptions.get(cluster, "Unknown cluster")
|
78 |
+
|
79 |
+
# Create a map
|
80 |
+
m = folium.Map(location=[latitude, longitude], zoom_start=12)
|
81 |
+
folium.Marker([latitude, longitude], popup=f"Cluster {cluster}: {description}").add_to(m)
|
82 |
+
map_html = m._repr_html_()
|
83 |
+
|
84 |
+
return f"The property '{property_name}' belongs to cluster {cluster}: {description}", map_html
|
85 |
+
|
86 |
+
# Create the Gradio interface
|
87 |
+
inputs = [
|
88 |
+
gr.Textbox(label="Property Name"),
|
89 |
+
gr.Slider(minimum=0, maximum=5, step=0.1, label="Star Rating", value=3.0),
|
90 |
+
gr.Number(label="Latitude"),
|
91 |
+
gr.Number(label="Longitude"),
|
92 |
+
gr.Dropdown(choices=property_type_mapping, label="Property Type")
|
93 |
+
]
|
94 |
+
|
95 |
+
outputs = [
|
96 |
+
gr.Textbox(label="Cluster Prediction"),
|
97 |
+
gr.HTML(label="Property Location on Map")
|
98 |
+
]
|
99 |
+
|
100 |
+
examples = [
|
101 |
+
["HOTEL SHREEMAN", 0, 22.719764, 75.861520, "Hotel"],
|
102 |
+
["Playotel Premier Vijay Nagar", 4, 22.756761, 75.903342, "Hotel"],
|
103 |
+
["Papaya Tree", 3, 22.626778, 75.804808, "Hotel"],
|
104 |
+
["ENRISE BY SAYAJI", 4, 22.625697, 75.803255, "Hotel"],
|
105 |
+
["Chokhi Dhani Indore", 3, 22.589608, 75.900815, "Resort"]
|
106 |
+
]
|
107 |
+
|
108 |
+
app = gr.Interface(
|
109 |
+
fn=predict_cluster,
|
110 |
+
inputs=inputs,
|
111 |
+
outputs=outputs,
|
112 |
+
examples=examples,
|
113 |
+
title="Property Clustering Prediction",
|
114 |
+
description=(
|
115 |
+
"Enter the details of a property to predict its cluster. "
|
116 |
+
"The model uses star rating, geographical location, and property type for clustering. "
|
117 |
+
"Clusters represent property categories such as hotels, apartments, resorts, etc."
|
118 |
+
),
|
119 |
+
theme="default"
|
120 |
+
)
|
121 |
+
|
122 |
+
# Run the Gradio app
|
123 |
+
app.launch()
|
kmeans_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:401803ee19647ea5c6938548623432575172b1b7ec659de0994d09a83513a0ac
|
3 |
+
size 3391
|
scaler.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da6ebf8518bc3e8333d4c931a029cf06b979a6960cdefbcd29c1b870540c09db
|
3 |
+
size 855
|