A-Celsius commited on
Commit
9637476
·
verified ·
1 Parent(s): da8708b

Upload 4 files

Browse files
Files changed (4) hide show
  1. encoder_categories.pkl +3 -0
  2. interface.py +123 -0
  3. kmeans_model.pkl +3 -0
  4. scaler.pkl +3 -0
encoder_categories.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5153392d11e8ec872d73c6b732298af6c8384d120d4a4d6cb6c05cc14cd4b3a0
3
+ size 100
interface.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import joblib
4
+ from math import radians, sin, cos, sqrt, atan2
5
+ import folium
6
+
7
+ # Load saved scaler and model
8
+ scaler = joblib.load('scaler.pkl')
9
+ kmeans_model = joblib.load('kmeans_model.pkl')
10
+
11
+ # Load the dataset
12
+ csv_file_path = 'properties.csv'
13
+ df = pd.read_csv(csv_file_path)
14
+
15
+ # Define the haversine function for distance calculation
16
+ def haversine(lat1, lon1, lat2, lon2):
17
+ R = 6371 # Radius of the Earth in kilometers
18
+ dlat = radians(lat2 - lat1)
19
+ dlon = radians(lon2 - lon1)
20
+ a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2
21
+ c = 2 * atan2(sqrt(a), sqrt(1 - a))
22
+ return R * c
23
+
24
+ # Calculate the centroid for distance computation
25
+ centroid_lat = df['Latitude'].mean()
26
+ centroid_lon = df['Longitude'].mean()
27
+
28
+ # OneHotEncoder category mapping
29
+ property_type_mapping = joblib.load('encoder_categories.pkl')
30
+
31
+ # Cluster descriptions
32
+ cluster_descriptions = {
33
+ 0: "Hotel/Others - Away from City Center",
34
+ 1: "Apartments",
35
+ 2: "Medium-large hotels - Close to the city centre",
36
+ 3: "Guest House",
37
+ 4: "Homestay",
38
+ 5: "Resorts",
39
+ 6: "Villa",
40
+ 7: "Apart-hotels",
41
+ 8: "Lodge",
42
+ 9: "Small hotels - Close to the city centre"
43
+ }
44
+
45
+ # Gradio prediction function
46
+ def predict_cluster(property_name, star_rating, latitude, longitude, property_type):
47
+ # Validate inputs
48
+ if not (-90 <= latitude <= 90) or not (-180 <= longitude <= 180):
49
+ return "Error: Latitude must be between -90 and 90, and longitude must be between -180 and 180."
50
+ if not (0 <= star_rating <= 5):
51
+ return "Error: Star rating must be between 0 and 5."
52
+
53
+ # Calculate distance from the center
54
+ distance_from_center = haversine(latitude, longitude, centroid_lat, centroid_lon)
55
+
56
+ # One-hot encode the property type
57
+ try:
58
+ property_type_index = property_type_mapping.index(property_type)
59
+ except ValueError:
60
+ return f"Error: Property type '{property_type}' is not in the recognized categories."
61
+
62
+ # Create a one-hot-encoded vector with the correct length
63
+ encoded_property_type = [0] * len(property_type_mapping)
64
+ encoded_property_type[property_type_index] = 1
65
+
66
+ # Prepare the feature vector
67
+ feature_vector = [star_rating, distance_from_center] + encoded_property_type
68
+
69
+ # Standardize the feature vector
70
+ try:
71
+ scaled_features = scaler.transform([feature_vector])
72
+ except ValueError as e:
73
+ return f"Error during scaling: {e}"
74
+
75
+ # Predict the cluster
76
+ cluster = kmeans_model.predict(scaled_features)[0]
77
+ description = cluster_descriptions.get(cluster, "Unknown cluster")
78
+
79
+ # Create a map
80
+ m = folium.Map(location=[latitude, longitude], zoom_start=12)
81
+ folium.Marker([latitude, longitude], popup=f"Cluster {cluster}: {description}").add_to(m)
82
+ map_html = m._repr_html_()
83
+
84
+ return f"The property '{property_name}' belongs to cluster {cluster}: {description}", map_html
85
+
86
+ # Create the Gradio interface
87
+ inputs = [
88
+ gr.Textbox(label="Property Name"),
89
+ gr.Slider(minimum=0, maximum=5, step=0.1, label="Star Rating", value=3.0),
90
+ gr.Number(label="Latitude"),
91
+ gr.Number(label="Longitude"),
92
+ gr.Dropdown(choices=property_type_mapping, label="Property Type")
93
+ ]
94
+
95
+ outputs = [
96
+ gr.Textbox(label="Cluster Prediction"),
97
+ gr.HTML(label="Property Location on Map")
98
+ ]
99
+
100
+ examples = [
101
+ ["HOTEL SHREEMAN", 0, 22.719764, 75.861520, "Hotel"],
102
+ ["Playotel Premier Vijay Nagar", 4, 22.756761, 75.903342, "Hotel"],
103
+ ["Papaya Tree", 3, 22.626778, 75.804808, "Hotel"],
104
+ ["ENRISE BY SAYAJI", 4, 22.625697, 75.803255, "Hotel"],
105
+ ["Chokhi Dhani Indore", 3, 22.589608, 75.900815, "Resort"]
106
+ ]
107
+
108
+ app = gr.Interface(
109
+ fn=predict_cluster,
110
+ inputs=inputs,
111
+ outputs=outputs,
112
+ examples=examples,
113
+ title="Property Clustering Prediction",
114
+ description=(
115
+ "Enter the details of a property to predict its cluster. "
116
+ "The model uses star rating, geographical location, and property type for clustering. "
117
+ "Clusters represent property categories such as hotels, apartments, resorts, etc."
118
+ ),
119
+ theme="default"
120
+ )
121
+
122
+ # Run the Gradio app
123
+ app.launch()
kmeans_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:401803ee19647ea5c6938548623432575172b1b7ec659de0994d09a83513a0ac
3
+ size 3391
scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da6ebf8518bc3e8333d4c931a029cf06b979a6960cdefbcd29c1b870540c09db
3
+ size 855