|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import joblib |
|
import matplotlib.pyplot as plt |
|
import os |
|
import openai |
|
from sklearn.preprocessing import LabelEncoder |
|
import requests |
|
from io import BytesIO |
|
import gdown |
|
|
|
|
|
st.set_page_config( |
|
page_title="The Guide", |
|
page_icon="π", |
|
layout="wide", |
|
initial_sidebar_state="expanded" |
|
) |
|
|
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
/* Base styles */ |
|
* { |
|
color: black !important; |
|
} |
|
|
|
/* Streamlit specific input elements */ |
|
.stSelectbox, |
|
.stNumberInput, |
|
.stTextInput { |
|
color: black !important; |
|
} |
|
|
|
/* Dropdown and select elements */ |
|
select option, |
|
.streamlit-selectbox option, |
|
.stSelectbox > div[data-baseweb="select"] > div, |
|
.stSelectbox > div > div > div { |
|
color: black !important; |
|
background-color: white !important; |
|
} |
|
|
|
/* Input fields */ |
|
input, |
|
.stNumberInput > div > div > input { |
|
color: black !important; |
|
} |
|
|
|
/* Text elements */ |
|
div.row-widget.stSelectbox > div, |
|
div.row-widget.stSelectbox > div > div > div, |
|
.streamlit-expanderContent, |
|
.stMarkdown, |
|
p, span, label { |
|
color: black !important; |
|
} |
|
|
|
/* Keep button text white */ |
|
.stButton > button { |
|
color: white !important; |
|
background-color: #FF4B4B; |
|
} |
|
|
|
/* Specific styling for select boxes */ |
|
div[data-baseweb="select"] { |
|
color: black !important; |
|
background-color: white !important; |
|
} |
|
|
|
div[data-baseweb="select"] * { |
|
color: black !important; |
|
} |
|
|
|
/* Style for the selected option */ |
|
div[data-baseweb="select"] > div:first-child { |
|
color: black !important; |
|
background-color: white !important; |
|
} |
|
|
|
/* Dropdown menu items */ |
|
[role="listbox"] { |
|
background-color: white !important; |
|
} |
|
|
|
[role="listbox"] [role="option"] { |
|
color: black !important; |
|
} |
|
|
|
/* Number input specific styling */ |
|
input[type="number"] { |
|
color: black !important; |
|
background-color: white !important; |
|
} |
|
|
|
.stNumberInput div[data-baseweb="input"] { |
|
background-color: white !important; |
|
} |
|
|
|
/* Headers */ |
|
h1, h2, h3, h4, h5, h6 { |
|
color: black !important; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
def create_brand_categories(): |
|
return { |
|
'luxury_brands': { |
|
'rolls-royce': (300000, 600000), |
|
'bentley': (200000, 500000), |
|
'lamborghini': (250000, 550000), |
|
'ferrari': (250000, 600000), |
|
'mclaren': (200000, 500000), |
|
'aston-martin': (150000, 400000), |
|
'maserati': (100000, 300000) |
|
}, |
|
'premium_brands': { |
|
'porsche': (60000, 150000), |
|
'bmw': (40000, 90000), |
|
'mercedes-benz': (45000, 95000), |
|
'audi': (35000, 85000), |
|
'lexus': (40000, 80000), |
|
'jaguar': (45000, 90000), |
|
'land-rover': (40000, 90000), |
|
'volvo': (35000, 75000), |
|
'infiniti': (35000, 70000), |
|
'cadillac': (40000, 85000), |
|
'tesla': (40000, 100000) |
|
}, |
|
'mid_tier_brands': { |
|
'acura': (30000, 50000), |
|
'lincoln': (35000, 65000), |
|
'buick': (25000, 45000), |
|
'chrysler': (25000, 45000), |
|
'alfa-romeo': (35000, 60000), |
|
'genesis': (35000, 60000) |
|
}, |
|
'standard_brands': { |
|
'toyota': (20000, 35000), |
|
'honda': (20000, 35000), |
|
'volkswagen': (20000, 35000), |
|
'mazda': (20000, 32000), |
|
'subaru': (22000, 35000), |
|
'hyundai': (18000, 32000), |
|
'kia': (17000, 30000), |
|
'ford': (20000, 40000), |
|
'chevrolet': (20000, 38000), |
|
'gmc': (25000, 45000), |
|
'jeep': (25000, 45000), |
|
'dodge': (22000, 40000), |
|
'ram': (25000, 45000), |
|
'nissan': (18000, 32000) |
|
}, |
|
'economy_brands': { |
|
'mitsubishi': (15000, 25000), |
|
'suzuki': (12000, 22000), |
|
'fiat': (15000, 25000), |
|
'mini': (20000, 35000), |
|
'smart': (15000, 25000) |
|
}, |
|
'discontinued_brands': { |
|
'pontiac': (5000, 15000), |
|
'saturn': (4000, 12000), |
|
'mercury': (4000, 12000), |
|
'oldsmobile': (3000, 10000), |
|
'plymouth': (3000, 10000), |
|
'saab': (5000, 15000) |
|
} |
|
} |
|
|
|
@st.cache_resource |
|
def download_file_from_google_drive(file_id): |
|
"""Downloads a file from Google Drive using gdown.""" |
|
url = f"https://drive.google.com/uc?id={file_id}" |
|
try: |
|
with st.spinner('Downloading from Google Drive...'): |
|
output = f"temp_{file_id}.pkl" |
|
gdown.download(url, output, quiet=False) |
|
|
|
with open(output, 'rb') as f: |
|
content = f.read() |
|
|
|
|
|
os.remove(output) |
|
return content |
|
|
|
except Exception as e: |
|
st.error(f"Error downloading from Google Drive: {str(e)}") |
|
raise e |
|
|
|
@st.cache_data |
|
def load_datasets(): |
|
"""Load the dataset from Google Drive.""" |
|
dataset_file_id = "1emG-BQ3-x4xsMAGMEznkh1ACdlAj5Dn1" |
|
|
|
try: |
|
with st.spinner('Loading dataset...'): |
|
content = download_file_from_google_drive(dataset_file_id) |
|
|
|
original_data = pd.read_csv(BytesIO(content), low_memory=False) |
|
|
|
|
|
original_data.columns = original_data.columns.str.strip().str.capitalize() |
|
return original_data |
|
except Exception as e: |
|
st.error(f"Error loading dataset: {str(e)}") |
|
raise e |
|
|
|
@st.cache_resource |
|
def load_model_and_encodings(): |
|
"""Load model from Google Drive and create encodings.""" |
|
model_file_id = "1wKixkdW2pVKEpJW-N1QIyKUr2nYirU7I" |
|
|
|
try: |
|
|
|
with st.spinner('Loading model...'): |
|
model_content = download_file_from_google_drive(model_file_id) |
|
model = joblib.load(BytesIO(model_content)) |
|
|
|
|
|
original_data = load_datasets() |
|
|
|
|
|
label_encoders = {} |
|
categorical_features = ['Make', 'model', 'condition', 'fuel', 'title_status', |
|
'transmission', 'drive', 'size', 'type', 'paint_color'] |
|
|
|
for feature in categorical_features: |
|
if feature in original_data.columns: |
|
le = LabelEncoder() |
|
unique_values = original_data[feature].fillna('unknown').str.strip().unique() |
|
le.fit(unique_values) |
|
label_encoders[feature.lower()] = le |
|
|
|
return model, label_encoders |
|
except Exception as e: |
|
st.error(f"Error loading model: {str(e)}") |
|
raise e |
|
|
|
|
|
|
|
try: |
|
original_data = load_datasets() |
|
model, label_encoders = load_model_and_encodings() |
|
except Exception as e: |
|
st.error(f"Error loading data or models: {str(e)}") |
|
st.stop() |
|
|
|
|
|
|
|
|
|
numeric_features = ['year', 'odometer', 'age', 'age_squared', 'mileage_per_year'] |
|
|
|
categorical_features = ['make', 'model', 'condition', 'fuel', 'title_status', |
|
'transmission', 'drive', 'size', 'type', 'paint_color'] |
|
required_features = numeric_features + categorical_features |
|
|
|
|
|
def create_features(df): |
|
df = df.copy() |
|
current_year = 2024 |
|
df['age'] = current_year - df['year'] |
|
df['age_squared'] = df['age'] ** 2 |
|
df['mileage_per_year'] = np.clip(df['odometer'] / (df['age'] + 1), 0, 200000) |
|
return df |
|
|
|
def prepare_input(input_dict, label_encoders): |
|
|
|
input_dict = {k: v if v is not None else 'unknown' for k, v in input_dict.items()} |
|
|
|
|
|
input_df = pd.DataFrame([input_dict]) |
|
|
|
|
|
feature_name_mapping = { |
|
"make": "Make", |
|
"model": "Model", |
|
"condition": "Condition", |
|
"fuel": "Fuel", |
|
"title_status": "Title_status", |
|
"transmission": "Transmission", |
|
"drive": "Drive", |
|
"size": "Size", |
|
"type": "Type", |
|
"paint_color": "Paint_color", |
|
"year": "Year", |
|
"odometer": "Odometer", |
|
"age": "Age", |
|
"age_squared": "Age_squared", |
|
"mileage_per_year": "Mileage_per_year" |
|
} |
|
input_df.rename(columns=feature_name_mapping, inplace=True) |
|
|
|
|
|
input_df["Year"] = pd.to_numeric(input_df.get("Year", 0), errors="coerce") |
|
input_df["Odometer"] = pd.to_numeric(input_df.get("Odometer", 0), errors="coerce") |
|
|
|
|
|
current_year = 2024 |
|
input_df["Age"] = current_year - input_df["Year"] |
|
input_df["Age_squared"] = input_df["Age"] ** 2 |
|
input_df["Mileage_per_year"] = input_df["Odometer"] / (input_df["Age"] + 1) |
|
input_df["Mileage_per_year"] = input_df["Mileage_per_year"].clip(0, 200000) |
|
|
|
|
|
for feature, encoded_feature in feature_name_mapping.items(): |
|
if feature in label_encoders: |
|
input_df[encoded_feature] = input_df[encoded_feature].fillna("unknown").astype(str).str.strip() |
|
try: |
|
input_df[encoded_feature] = label_encoders[feature].transform(input_df[encoded_feature]) |
|
except ValueError: |
|
input_df[encoded_feature] = 0 |
|
|
|
|
|
for feature in model.feature_names_in_: |
|
if feature not in input_df: |
|
input_df[feature] = 0 |
|
|
|
|
|
input_df = input_df[model.feature_names_in_] |
|
|
|
return input_df |
|
|
|
|
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
/* Force black text globally */ |
|
.stApp, .stApp * { |
|
color: black !important; |
|
} |
|
|
|
/* Specific overrides for different elements */ |
|
.main { |
|
padding: 0rem 1rem; |
|
} |
|
|
|
.stButton>button { |
|
width: 100%; |
|
background-color: #FF4B4B; |
|
color: white !important; /* Keep button text white */ |
|
border-radius: 5px; |
|
padding: 0.5rem 1rem; |
|
border: none; |
|
} |
|
|
|
.stButton>button:hover { |
|
background-color: #FF6B6B; |
|
} |
|
|
|
.sidebar .sidebar-content { |
|
background-color: #f5f5f5; |
|
} |
|
|
|
/* Input fields and selectboxes */ |
|
.stSelectbox select, |
|
.stSelectbox option, |
|
.stSelectbox div, |
|
.stNumberInput input, |
|
.stTextInput input { |
|
color: black !important; |
|
} |
|
|
|
/* Headers */ |
|
h1, h2, h3, h4, h5, h6 { |
|
color: black !important; |
|
} |
|
|
|
/* Labels and text */ |
|
label, .stText, p, span { |
|
color: black !important; |
|
} |
|
|
|
/* Selectbox options */ |
|
option { |
|
color: black !important; |
|
background-color: white !important; |
|
} |
|
|
|
/* Override for any Streamlit specific classes */ |
|
.st-emotion-cache-16idsys p, |
|
.st-emotion-cache-1wmy9hl p, |
|
.st-emotion-cache-16idsys span, |
|
.st-emotion-cache-1wmy9hl span { |
|
color: black !important; |
|
} |
|
|
|
/* Force white text only for the prediction button */ |
|
.stButton>button[data-testid="stButton"] { |
|
color: white !important; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
def style_metric_container(label, value): |
|
st.markdown(f""" |
|
<div style=" |
|
background-color: #f8f9fa; |
|
padding: 1rem; |
|
border-radius: 5px; |
|
margin: 0.5rem 0; |
|
border-left: 5px solid #FF4B4B; |
|
"> |
|
<p style="color: #666; margin-bottom: 0.2rem; font-size: 0.9rem;">{label}</p> |
|
<p style="color: #1E1E1E; font-size: 1.5rem; font-weight: 600; margin: 0;">{value}</p> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
def generate_gpt_response(prompt): |
|
|
|
|
|
openai.api_key = "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" |
|
|
|
|
|
system_message = { |
|
"role": "system", |
|
"content": ( |
|
"You are a helpful car shopping assistant. " |
|
"Provide car recommendations based on user queries. " |
|
"Include car makes, models, years, and approximate prices. " |
|
"Be friendly and informative." |
|
) |
|
} |
|
|
|
messages = [system_message, {"role": "user", "content": prompt}] |
|
|
|
|
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=messages, |
|
max_tokens=500, |
|
n=1, |
|
stop=None, |
|
temperature=0.7, |
|
) |
|
|
|
|
|
assistant_reply = response['choices'][0]['message']['content'].strip() |
|
|
|
return assistant_reply |
|
|
|
def create_assistant_section(): |
|
st.markdown(""" |
|
<div style='background-color: #f8f9fa; padding: 1.5rem; border-radius: 10px; margin-bottom: 1rem;'> |
|
<h2 style='color: #1E1E1E; margin-top: 0;'>π€ Car Shopping Assistant</h2> |
|
<p style='color: #666;'>Ask me anything about cars! For example: 'What's a good car under $30,000 with low mileage?'</p> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
if "assistant_responses" not in st.session_state: |
|
st.session_state.assistant_responses = [] |
|
|
|
prompt = st.text_input("Ask about car recommendations...", |
|
placeholder="Type your question here...") |
|
|
|
if prompt: |
|
try: |
|
|
|
response = generate_gpt_response(prompt) |
|
st.session_state.assistant_responses.append(response) |
|
except Exception as e: |
|
response = f"Sorry, I encountered an error: {str(e)}" |
|
st.session_state.assistant_responses.append(response) |
|
|
|
|
|
st.write(response) |
|
|
|
|
|
if len(st.session_state.assistant_responses) > 1: |
|
st.markdown("### Previous Responses") |
|
for prev_response in st.session_state.assistant_responses[:-1]: |
|
st.markdown("---") |
|
st.write(prev_response) |
|
|
|
if st.button("Clear Chat"): |
|
st.session_state.assistant_responses = [] |
|
st.experimental_rerun() |
|
|
|
|
|
def create_prediction_interface(): |
|
with st.sidebar: |
|
st.markdown(""" |
|
<div style='background-color: #FF4B4B; padding: 1rem; border-radius: 5px; margin-bottom: 2rem;'> |
|
<h2 style='color: white; margin: 0;'>Car Details</h2> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
year = st.slider("Year", min_value=1980, max_value=2024, value=2022) |
|
|
|
|
|
make_options = sorted(original_data['Make'].dropna().unique()) |
|
make = st.selectbox("Make", options=make_options) |
|
|
|
|
|
filtered_models = sorted(original_data[original_data['Make'] == make]['Model'].dropna().unique()) |
|
model_name = st.selectbox("Model", options=filtered_models if len(filtered_models) > 0 else ["No models available"]) |
|
|
|
if model_name == "No models available": |
|
st.warning("No models are available for the selected make.") |
|
|
|
|
|
condition = st.selectbox("Condition", ['new', 'like new', 'excellent', 'good', 'fair', 'salvage', 'parts only']) |
|
fuel = st.selectbox("Fuel Type", sorted(original_data['Fuel'].fillna('Unknown').unique())) |
|
odometer = st.number_input("Odometer (miles)", min_value=0, value=20000, format="%d", step=1000) |
|
title_status = st.selectbox("Title Status", sorted(original_data['Title_status'].fillna('Unknown').unique())) |
|
transmission = st.selectbox("Transmission", sorted(original_data['Transmission'].fillna('Unknown').unique())) |
|
drive = st.selectbox("Drive Type", sorted(original_data['Drive'].fillna('Unknown').unique())) |
|
size = st.selectbox("Size", sorted(original_data['Size'].fillna('Unknown').unique())) |
|
paint_color = st.selectbox("Paint Color", sorted(original_data['Paint_color'].fillna('Unknown').unique())) |
|
|
|
car_type = 'sedan' |
|
|
|
|
|
predict_button = st.button("π Predict Price", use_container_width=True) |
|
|
|
return { |
|
'year': year, |
|
'make': make.strip(), |
|
'model': model_name if model_name != "No models available" else 'unknown', |
|
'condition': condition.lower().strip(), |
|
'fuel': fuel.lower().strip(), |
|
'odometer': odometer, |
|
'title_status': title_status.lower().strip(), |
|
'transmission': transmission.lower().strip(), |
|
'drive': drive.lower().strip(), |
|
'size': size.lower().strip(), |
|
'type': car_type.lower().strip(), |
|
'paint_color': paint_color.lower().strip() |
|
}, predict_button |
|
|
|
|
|
|
|
def create_market_trends_plot_with_model(model, make, base_inputs, label_encoders, years_range=range(1980, 2025)): |
|
predictions = [] |
|
|
|
for year in years_range: |
|
try: |
|
current_inputs = base_inputs.copy() |
|
current_inputs['year'] = float(year) |
|
age = 2024 - year |
|
|
|
|
|
base_price = 30000 |
|
|
|
|
|
if age <= 1: |
|
value_factor = 0.85 |
|
elif age <= 5: |
|
value_factor = 0.85 * (0.90 ** (age - 1)) |
|
else: |
|
value_factor = 0.85 * (0.90 ** 4) * (0.95 ** (age - 5)) |
|
|
|
price = base_price * value_factor |
|
predictions.append({"year": year, "predicted_price": max(price, 2000)}) |
|
|
|
except Exception as e: |
|
continue |
|
|
|
if not predictions: |
|
return None |
|
|
|
predictions_df = pd.DataFrame(predictions) |
|
fig, ax = plt.subplots(figsize=(12, 6)) |
|
ax.plot(predictions_df["year"], predictions_df["predicted_price"], color="#FF4B4B", linewidth=2) |
|
ax.set_title(f"Average Car Value by Age") |
|
ax.set_xlabel("Year") |
|
ax.set_ylabel("Value ($)") |
|
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'${x:,.0f}')) |
|
plt.grid(True, alpha=0.3) |
|
|
|
return fig |
|
|
|
def inspect_model_features(model): |
|
|
|
try: |
|
if hasattr(model, "feature_names_in_"): |
|
print("Model feature names:", model.feature_names_in_) |
|
else: |
|
print("Model does not have 'feature_names_in_' attribute.") |
|
except Exception as e: |
|
print(f"Error inspecting model features: {e}") |
|
|
|
def predict_with_ranges(inputs, model, label_encoders): |
|
input_df = prepare_input(inputs, label_encoders) |
|
base_prediction = float(np.expm1(model.predict(input_df)[0])) |
|
|
|
brand_categories = create_brand_categories() |
|
make = inputs['make'].lower() |
|
year = inputs['year'] |
|
condition = inputs['condition'] |
|
odometer = inputs['odometer'] |
|
age = 2024 - year |
|
|
|
|
|
price_range = None |
|
for category, brands in brand_categories.items(): |
|
if make in brands: |
|
price_range = brands[make] |
|
break |
|
if not price_range: |
|
price_range = (15000, 35000) |
|
|
|
|
|
mileage_factor = max(1 - (odometer / 200000) * 0.3, 0.7) |
|
age_factor = 0.85 ** min(age, 15) |
|
condition_factor = { |
|
'new': 1.0, |
|
'like new': 0.9, |
|
'excellent': 0.8, |
|
'good': 0.7, |
|
'fair': 0.5, |
|
'salvage': 0.3 |
|
}.get(condition, 0.7) |
|
|
|
|
|
min_price = price_range[0] * mileage_factor * age_factor * condition_factor |
|
max_price = price_range[1] * mileage_factor * age_factor * condition_factor |
|
predicted_price = base_prediction * mileage_factor * age_factor * condition_factor |
|
|
|
|
|
final_prediction = np.random.uniform(min_price, max_price) |
|
|
|
return { |
|
'predicted_price': final_prediction, |
|
'min_price': min_price, |
|
'max_price': max_price |
|
} |
|
|
|
def main(model, label_encoders): |
|
col1, col2 = st.columns([2, 1]) |
|
|
|
with col1: |
|
st.markdown(""" |
|
<h1 style='text-align: center;'>The Guide π</h1> |
|
<p style='text-align: center; color: #666; font-size: 1.1rem; margin-bottom: 2rem;'> |
|
A cutting-edge data science project leveraging machine learning to detect which car would be best for you. |
|
</p> |
|
""", unsafe_allow_html=True) |
|
|
|
inputs, predict_button = create_prediction_interface() |
|
|
|
|
|
base_inputs = { |
|
"year": inputs.get("year", 2022), |
|
"make": inputs.get("make", "toyota").lower(), |
|
"model": inputs.get("model", "camry"), |
|
"odometer": inputs.get("odometer", 20000), |
|
"condition": inputs.get("condition", "good"), |
|
"fuel": inputs.get("fuel", "gas"), |
|
"title_status": inputs.get("title_status", "clean"), |
|
"transmission": inputs.get("transmission", "automatic"), |
|
"drive": inputs.get("drive", "fwd"), |
|
"size": inputs.get("size", "mid-size"), |
|
"paint_color": inputs.get("paint_color", "black"), |
|
"type": inputs.get("type", "sedan") |
|
} |
|
|
|
if base_inputs["condition"] == "new": |
|
base_inputs["odometer"] = 0 |
|
|
|
if predict_button: |
|
st.write(f"Analyzing {base_inputs['year']} {base_inputs['make'].title()} {base_inputs['model'].title()}...") |
|
prediction_results = predict_with_ranges(base_inputs, model, label_encoders) |
|
|
|
st.markdown(f""" |
|
### Price Analysis |
|
- **Estimated Range**: ${prediction_results['min_price']:,.2f} - ${prediction_results['max_price']:,.2f} |
|
- **Model Prediction**: ${prediction_results['predicted_price']:,.2f} |
|
|
|
*Note: Range based on market data, condition, and mileage* |
|
""") |
|
|
|
|
|
fig = create_market_trends_plot_with_model(model, base_inputs["make"], base_inputs, label_encoders) |
|
if fig: |
|
st.pyplot(fig) |
|
else: |
|
st.warning("No graph generated. Please check your data or selection.") |
|
|
|
with col2: |
|
create_assistant_section() |
|
|
|
if __name__ == "__main__": |
|
try: |
|
|
|
original_data = load_datasets() |
|
model, label_encoders = load_model_and_encodings() |
|
|
|
|
|
inspect_model_features(model) |
|
|
|
|
|
main(model, label_encoders) |
|
except Exception as e: |
|
st.error(f"Error loading data or models: {str(e)}") |
|
st.stop() |
|
|