File size: 4,176 Bytes
87b2cff
9637476
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#import libraries
import gradio as gr
import pandas as pd
import joblib
from math import radians, sin, cos, sqrt, atan2
import folium

# Load saved scaler and model
scaler = joblib.load('scaler.pkl')
kmeans_model = joblib.load('kmeans_model.pkl')

# Load the dataset
csv_file_path = 'properties.csv'
df = pd.read_csv(csv_file_path)

# Define the haversine function for distance calculation
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of the Earth in kilometers
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c

# Calculate the centroid for distance computation
centroid_lat = df['Latitude'].mean()
centroid_lon = df['Longitude'].mean()

# OneHotEncoder category mapping
property_type_mapping = joblib.load('encoder_categories.pkl')

# Cluster descriptions
cluster_descriptions = {
    0: "Hotel/Others - Away from City Center",
    1: "Apartments",
    2: "Medium-large hotels - Close to the city centre",
    3: "Guest House",
    4: "Homestay",
    5: "Resorts",
    6: "Villa",
    7: "Apart-hotels",
    8: "Lodge",
    9: "Small hotels - Close to the city centre"
}

# Gradio prediction function
def predict_cluster(property_name, star_rating, latitude, longitude, property_type):
    # Validate inputs
    if not (-90 <= latitude <= 90) or not (-180 <= longitude <= 180):
        return "Error: Latitude must be between -90 and 90, and longitude must be between -180 and 180."
    if not (0 <= star_rating <= 5):
        return "Error: Star rating must be between 0 and 5."

    # Calculate distance from the center
    distance_from_center = haversine(latitude, longitude, centroid_lat, centroid_lon)

    # One-hot encode the property type
    try:
        property_type_index = property_type_mapping.index(property_type)
    except ValueError:
        return f"Error: Property type '{property_type}' is not in the recognized categories."

    # Create a one-hot-encoded vector with the correct length
    encoded_property_type = [0] * len(property_type_mapping)
    encoded_property_type[property_type_index] = 1

    # Prepare the feature vector
    feature_vector = [star_rating, distance_from_center] + encoded_property_type

    # Standardize the feature vector
    try:
        scaled_features = scaler.transform([feature_vector])
    except ValueError as e:
        return f"Error during scaling: {e}"

    # Predict the cluster
    cluster = kmeans_model.predict(scaled_features)[0]
    description = cluster_descriptions.get(cluster, "Unknown cluster")

    # Create a map
    m = folium.Map(location=[latitude, longitude], zoom_start=12)
    folium.Marker([latitude, longitude], popup=f"Cluster {cluster}: {description}").add_to(m)
    map_html = m._repr_html_()

    return f"The property '{property_name}' belongs to cluster {cluster}: {description}", map_html

# Create the Gradio interface
inputs = [
    gr.Textbox(label="Property Name"),
    gr.Slider(minimum=0, maximum=5, step=0.1, label="Star Rating", value=3.0),
    gr.Number(label="Latitude"),
    gr.Number(label="Longitude"),
    gr.Dropdown(choices=property_type_mapping, label="Property Type")
]

outputs = [
    gr.Textbox(label="Cluster Prediction"),
    gr.HTML(label="Property Location on Map")
]

examples = [
    ["HOTEL SHREEMAN", 0, 22.719764, 75.861520, "Hotel"],
    ["Playotel Premier Vijay Nagar", 4, 22.756761, 75.903342, "Hotel"],
    ["Papaya Tree", 3, 22.626778, 75.804808, "Hotel"],
    ["ENRISE BY SAYAJI", 4, 22.625697, 75.803255, "Hotel"],
    ["Chokhi Dhani Indore", 3, 22.589608, 75.900815, "Resort"]
]

app = gr.Interface(
    fn=predict_cluster,
    inputs=inputs,
    outputs=outputs,
    examples=examples,
    title="Property Clustering Prediction",
    description=(
        "Enter the details of a property to predict its cluster. "
        "The model uses star rating, geographical location, and property type for clustering. "
        "Clusters represent property categories such as hotels, apartments, resorts, etc."
    ),
    theme="default"
)

# Run the Gradio app
app.launch()