A-Celsius's picture
Update app.py
87b2cff verified
#import libraries
import gradio as gr
import pandas as pd
import joblib
from math import radians, sin, cos, sqrt, atan2
import folium
# Load saved scaler and model
scaler = joblib.load('scaler.pkl')
kmeans_model = joblib.load('kmeans_model.pkl')
# Load the dataset
csv_file_path = 'properties.csv'
df = pd.read_csv(csv_file_path)
# Define the haversine function for distance calculation
def haversine(lat1, lon1, lat2, lon2):
R = 6371 # Radius of the Earth in kilometers
dlat = radians(lat2 - lat1)
dlon = radians(lon2 - lon1)
a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2
c = 2 * atan2(sqrt(a), sqrt(1 - a))
return R * c
# Calculate the centroid for distance computation
centroid_lat = df['Latitude'].mean()
centroid_lon = df['Longitude'].mean()
# OneHotEncoder category mapping
property_type_mapping = joblib.load('encoder_categories.pkl')
# Cluster descriptions
cluster_descriptions = {
0: "Hotel/Others - Away from City Center",
1: "Apartments",
2: "Medium-large hotels - Close to the city centre",
3: "Guest House",
4: "Homestay",
5: "Resorts",
6: "Villa",
7: "Apart-hotels",
8: "Lodge",
9: "Small hotels - Close to the city centre"
}
# Gradio prediction function
def predict_cluster(property_name, star_rating, latitude, longitude, property_type):
# Validate inputs
if not (-90 <= latitude <= 90) or not (-180 <= longitude <= 180):
return "Error: Latitude must be between -90 and 90, and longitude must be between -180 and 180."
if not (0 <= star_rating <= 5):
return "Error: Star rating must be between 0 and 5."
# Calculate distance from the center
distance_from_center = haversine(latitude, longitude, centroid_lat, centroid_lon)
# One-hot encode the property type
try:
property_type_index = property_type_mapping.index(property_type)
except ValueError:
return f"Error: Property type '{property_type}' is not in the recognized categories."
# Create a one-hot-encoded vector with the correct length
encoded_property_type = [0] * len(property_type_mapping)
encoded_property_type[property_type_index] = 1
# Prepare the feature vector
feature_vector = [star_rating, distance_from_center] + encoded_property_type
# Standardize the feature vector
try:
scaled_features = scaler.transform([feature_vector])
except ValueError as e:
return f"Error during scaling: {e}"
# Predict the cluster
cluster = kmeans_model.predict(scaled_features)[0]
description = cluster_descriptions.get(cluster, "Unknown cluster")
# Create a map
m = folium.Map(location=[latitude, longitude], zoom_start=12)
folium.Marker([latitude, longitude], popup=f"Cluster {cluster}: {description}").add_to(m)
map_html = m._repr_html_()
return f"The property '{property_name}' belongs to cluster {cluster}: {description}", map_html
# Create the Gradio interface
inputs = [
gr.Textbox(label="Property Name"),
gr.Slider(minimum=0, maximum=5, step=0.1, label="Star Rating", value=3.0),
gr.Number(label="Latitude"),
gr.Number(label="Longitude"),
gr.Dropdown(choices=property_type_mapping, label="Property Type")
]
outputs = [
gr.Textbox(label="Cluster Prediction"),
gr.HTML(label="Property Location on Map")
]
examples = [
["HOTEL SHREEMAN", 0, 22.719764, 75.861520, "Hotel"],
["Playotel Premier Vijay Nagar", 4, 22.756761, 75.903342, "Hotel"],
["Papaya Tree", 3, 22.626778, 75.804808, "Hotel"],
["ENRISE BY SAYAJI", 4, 22.625697, 75.803255, "Hotel"],
["Chokhi Dhani Indore", 3, 22.589608, 75.900815, "Resort"]
]
app = gr.Interface(
fn=predict_cluster,
inputs=inputs,
outputs=outputs,
examples=examples,
title="Property Clustering Prediction",
description=(
"Enter the details of a property to predict its cluster. "
"The model uses star rating, geographical location, and property type for clustering. "
"Clusters represent property categories such as hotels, apartments, resorts, etc."
),
theme="default"
)
# Run the Gradio app
app.launch()