import streamlit as st import pandas as pd import numpy as np import joblib import matplotlib.pyplot as plt import os import openai from sklearn.preprocessing import LabelEncoder import requests # Add this at the top with other imports from io import BytesIO import gdown # --- Set page configuration --- st.set_page_config( page_title="The Guide", page_icon="🚗", layout="wide", initial_sidebar_state="expanded" ) # --- Custom CSS for better styling --- st.markdown(""" """, unsafe_allow_html=True) # --- Cache functions --- def create_brand_categories(): return { 'luxury_brands': { 'rolls-royce': (300000, 600000), 'bentley': (200000, 500000), 'lamborghini': (250000, 550000), 'ferrari': (250000, 600000), 'mclaren': (200000, 500000), 'aston-martin': (150000, 400000), 'maserati': (100000, 300000) }, 'premium_brands': { 'porsche': (60000, 150000), 'bmw': (40000, 90000), 'mercedes-benz': (45000, 95000), 'audi': (35000, 85000), 'lexus': (40000, 80000), 'jaguar': (45000, 90000), 'land-rover': (40000, 90000), 'volvo': (35000, 75000), 'infiniti': (35000, 70000), 'cadillac': (40000, 85000), 'tesla': (40000, 100000) }, 'mid_tier_brands': { 'acura': (30000, 50000), 'lincoln': (35000, 65000), 'buick': (25000, 45000), 'chrysler': (25000, 45000), 'alfa-romeo': (35000, 60000), 'genesis': (35000, 60000) }, 'standard_brands': { 'toyota': (20000, 35000), 'honda': (20000, 35000), 'volkswagen': (20000, 35000), 'mazda': (20000, 32000), 'subaru': (22000, 35000), 'hyundai': (18000, 32000), 'kia': (17000, 30000), 'ford': (20000, 40000), 'chevrolet': (20000, 38000), 'gmc': (25000, 45000), 'jeep': (25000, 45000), 'dodge': (22000, 40000), 'ram': (25000, 45000), 'nissan': (18000, 32000) }, 'economy_brands': { 'mitsubishi': (15000, 25000), 'suzuki': (12000, 22000), 'fiat': (15000, 25000), 'mini': (20000, 35000), 'smart': (15000, 25000) }, 'discontinued_brands': { 'pontiac': (5000, 15000), 'saturn': (4000, 12000), 'mercury': (4000, 12000), 'oldsmobile': (3000, 10000), 'plymouth': (3000, 10000), 'saab': (5000, 15000) } } @st.cache_resource def download_file_from_google_drive(file_id): """Downloads a file from Google Drive using gdown.""" url = f"https://drive.google.com/uc?id={file_id}" try: with st.spinner('Downloading from Google Drive...'): output = f"temp_{file_id}.pkl" gdown.download(url, output, quiet=False) with open(output, 'rb') as f: content = f.read() # Clean up the temporary file os.remove(output) return content except Exception as e: st.error(f"Error downloading from Google Drive: {str(e)}") raise e @st.cache_data def load_datasets(): """Load the dataset from Google Drive.""" dataset_file_id = "1emG-BQ3-x4xsMAGMEznkh1ACdlAj5Dn1" try: with st.spinner('Loading dataset...'): content = download_file_from_google_drive(dataset_file_id) # Use BytesIO to read the CSV content original_data = pd.read_csv(BytesIO(content), low_memory=False) # Ensure column names match the model's expectations original_data.columns = original_data.columns.str.strip().str.capitalize() return original_data except Exception as e: st.error(f"Error loading dataset: {str(e)}") raise e @st.cache_resource def load_model_and_encodings(): """Load model from Google Drive and create encodings.""" model_file_id = "1wKixkdW2pVKEpJW-N1QIyKUr2nYirU7I" try: # Show loading message with st.spinner('Loading model...'): model_content = download_file_from_google_drive(model_file_id) model = joblib.load(BytesIO(model_content)) # Load data for encodings original_data = load_datasets() # Create fresh encoders from data label_encoders = {} categorical_features = ['Make', 'model', 'condition', 'fuel', 'title_status', 'transmission', 'drive', 'size', 'type', 'paint_color'] for feature in categorical_features: if feature in original_data.columns: le = LabelEncoder() unique_values = original_data[feature].fillna('unknown').str.strip().unique() le.fit(unique_values) label_encoders[feature.lower()] = le return model, label_encoders except Exception as e: st.error(f"Error loading model: {str(e)}") raise e # --- Load data and models --- try: original_data = load_datasets() model, label_encoders = load_model_and_encodings() # Using the new function except Exception as e: st.error(f"Error loading data or models: {str(e)}") st.stop() # --- Define categorical and numeric features --- # From model.py # --- Define features --- numeric_features = ['year', 'odometer', 'age', 'age_squared', 'mileage_per_year'] # Update the categorical features list to use lowercase categorical_features = ['make', 'model', 'condition', 'fuel', 'title_status', 'transmission', 'drive', 'size', 'type', 'paint_color'] required_features = numeric_features + categorical_features # --- Feature engineering functions --- def create_features(df): df = df.copy() current_year = 2024 df['age'] = current_year - df['year'] df['age_squared'] = df['age'] ** 2 df['mileage_per_year'] = np.clip(df['odometer'] / (df['age'] + 1), 0, 200000) return df def prepare_input(input_dict, label_encoders): # Convert None values to 'unknown' for safe handling input_dict = {k: v if v is not None else 'unknown' for k, v in input_dict.items()} # Convert input dictionary to DataFrame input_df = pd.DataFrame([input_dict]) # Ensure columns match the model's expected casing feature_name_mapping = { "make": "Make", # Match casing for 'Make' "model": "Model", # Match casing for 'Model' "condition": "Condition", "fuel": "Fuel", "title_status": "Title_status", "transmission": "Transmission", "drive": "Drive", "size": "Size", "type": "Type", "paint_color": "Paint_color", "year": "Year", "odometer": "Odometer", "age": "Age", "age_squared": "Age_squared", "mileage_per_year": "Mileage_per_year" } input_df.rename(columns=feature_name_mapping, inplace=True) # Numeric feature conversions input_df["Year"] = pd.to_numeric(input_df.get("Year", 0), errors="coerce") input_df["Odometer"] = pd.to_numeric(input_df.get("Odometer", 0), errors="coerce") # Feature engineering current_year = 2024 input_df["Age"] = current_year - input_df["Year"] input_df["Age_squared"] = input_df["Age"] ** 2 input_df["Mileage_per_year"] = input_df["Odometer"] / (input_df["Age"] + 1) input_df["Mileage_per_year"] = input_df["Mileage_per_year"].clip(0, 200000) # Encode categorical features for feature, encoded_feature in feature_name_mapping.items(): if feature in label_encoders: input_df[encoded_feature] = input_df[encoded_feature].fillna("unknown").astype(str).str.strip() try: input_df[encoded_feature] = label_encoders[feature].transform(input_df[encoded_feature]) except ValueError: input_df[encoded_feature] = 0 # Assign default for unseen values # Ensure all required features are present for feature in model.feature_names_in_: if feature not in input_df: input_df[feature] = 0 # Default value for missing features # Reorder columns input_df = input_df[model.feature_names_in_] return input_df # --- Styling functions --- st.markdown(""" """, unsafe_allow_html=True) def style_metric_container(label, value): st.markdown(f"""
{label}
{value}
Ask me anything about cars! For example: 'What's a good car under $30,000 with low mileage?'
A cutting-edge data science project leveraging machine learning to detect which car would be best for you.
""", unsafe_allow_html=True) inputs, predict_button = create_prediction_interface() # Prepare base inputs base_inputs = { "year": inputs.get("year", 2022), "make": inputs.get("make", "toyota").lower(), "model": inputs.get("model", "camry"), "odometer": inputs.get("odometer", 20000), "condition": inputs.get("condition", "good"), "fuel": inputs.get("fuel", "gas"), "title_status": inputs.get("title_status", "clean"), "transmission": inputs.get("transmission", "automatic"), "drive": inputs.get("drive", "fwd"), "size": inputs.get("size", "mid-size"), "paint_color": inputs.get("paint_color", "black"), "type": inputs.get("type", "sedan") } if base_inputs["condition"] == "new": base_inputs["odometer"] = 0 if predict_button: st.write(f"Analyzing {base_inputs['year']} {base_inputs['make'].title()} {base_inputs['model'].title()}...") prediction_results = predict_with_ranges(base_inputs, model, label_encoders) st.markdown(f""" ### Price Analysis - **Estimated Range**: ${prediction_results['min_price']:,.2f} - ${prediction_results['max_price']:,.2f} - **Model Prediction**: ${prediction_results['predicted_price']:,.2f} *Note: Range based on market data, condition, and mileage* """) # Generate and display the graph fig = create_market_trends_plot_with_model(model, base_inputs["make"], base_inputs, label_encoders) if fig: st.pyplot(fig) else: st.warning("No graph generated. Please check your data or selection.") with col2: create_assistant_section() if __name__ == "__main__": try: # Load data and model original_data = load_datasets() model, label_encoders = load_model_and_encodings() # Inspect model features inspect_model_features(model) # Call the main function main(model, label_encoders) except Exception as e: st.error(f"Error loading data or models: {str(e)}") st.stop()