#import libraries import gradio as gr import pandas as pd import joblib from math import radians, sin, cos, sqrt, atan2 import folium # Load saved scaler and model scaler = joblib.load('scaler.pkl') kmeans_model = joblib.load('kmeans_model.pkl') # Load the dataset csv_file_path = 'properties.csv' df = pd.read_csv(csv_file_path) # Define the haversine function for distance calculation def haversine(lat1, lon1, lat2, lon2): R = 6371 # Radius of the Earth in kilometers dlat = radians(lat2 - lat1) dlon = radians(lon2 - lon1) a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2 c = 2 * atan2(sqrt(a), sqrt(1 - a)) return R * c # Calculate the centroid for distance computation centroid_lat = df['Latitude'].mean() centroid_lon = df['Longitude'].mean() # OneHotEncoder category mapping property_type_mapping = joblib.load('encoder_categories.pkl') # Cluster descriptions cluster_descriptions = { 0: "Hotel/Others - Away from City Center", 1: "Apartments", 2: "Medium-large hotels - Close to the city centre", 3: "Guest House", 4: "Homestay", 5: "Resorts", 6: "Villa", 7: "Apart-hotels", 8: "Lodge", 9: "Small hotels - Close to the city centre" } # Gradio prediction function def predict_cluster(property_name, star_rating, latitude, longitude, property_type): # Validate inputs if not (-90 <= latitude <= 90) or not (-180 <= longitude <= 180): return "Error: Latitude must be between -90 and 90, and longitude must be between -180 and 180." if not (0 <= star_rating <= 5): return "Error: Star rating must be between 0 and 5." # Calculate distance from the center distance_from_center = haversine(latitude, longitude, centroid_lat, centroid_lon) # One-hot encode the property type try: property_type_index = property_type_mapping.index(property_type) except ValueError: return f"Error: Property type '{property_type}' is not in the recognized categories." # Create a one-hot-encoded vector with the correct length encoded_property_type = [0] * len(property_type_mapping) encoded_property_type[property_type_index] = 1 # Prepare the feature vector feature_vector = [star_rating, distance_from_center] + encoded_property_type # Standardize the feature vector try: scaled_features = scaler.transform([feature_vector]) except ValueError as e: return f"Error during scaling: {e}" # Predict the cluster cluster = kmeans_model.predict(scaled_features)[0] description = cluster_descriptions.get(cluster, "Unknown cluster") # Create a map m = folium.Map(location=[latitude, longitude], zoom_start=12) folium.Marker([latitude, longitude], popup=f"Cluster {cluster}: {description}").add_to(m) map_html = m._repr_html_() return f"The property '{property_name}' belongs to cluster {cluster}: {description}", map_html # Create the Gradio interface inputs = [ gr.Textbox(label="Property Name"), gr.Slider(minimum=0, maximum=5, step=0.1, label="Star Rating", value=3.0), gr.Number(label="Latitude"), gr.Number(label="Longitude"), gr.Dropdown(choices=property_type_mapping, label="Property Type") ] outputs = [ gr.Textbox(label="Cluster Prediction"), gr.HTML(label="Property Location on Map") ] examples = [ ["HOTEL SHREEMAN", 0, 22.719764, 75.861520, "Hotel"], ["Playotel Premier Vijay Nagar", 4, 22.756761, 75.903342, "Hotel"], ["Papaya Tree", 3, 22.626778, 75.804808, "Hotel"], ["ENRISE BY SAYAJI", 4, 22.625697, 75.803255, "Hotel"], ["Chokhi Dhani Indore", 3, 22.589608, 75.900815, "Resort"] ] app = gr.Interface( fn=predict_cluster, inputs=inputs, outputs=outputs, examples=examples, title="Property Clustering Prediction", description=( "Enter the details of a property to predict its cluster. " "The model uses star rating, geographical location, and property type for clustering. " "Clusters represent property categories such as hotels, apartments, resorts, etc." ), theme="default" ) # Run the Gradio app app.launch()