Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,9 +11,6 @@ from sklearn.preprocessing import LabelEncoder
|
|
11 |
from huggingface_hub import hf_hub_download
|
12 |
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
|
13 |
import torch
|
14 |
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
15 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
16 |
-
import re
|
17 |
from datetime import datetime
|
18 |
|
19 |
# Dataset loading function with caching
|
@@ -22,7 +19,7 @@ def load_datasets():
|
|
22 |
try:
|
23 |
with st.spinner('Loading dataset...'):
|
24 |
original_data = pd.read_csv('CTP_Model1.csv', low_memory=False)
|
25 |
-
original_data.columns = original_data.columns.str.strip().str.
|
26 |
return original_data
|
27 |
except Exception as e:
|
28 |
st.error(f"Error loading dataset: {str(e)}")
|
@@ -62,7 +59,7 @@ def classify_image(image):
|
|
62 |
|
63 |
def find_closest_match(df, brand, model):
|
64 |
# Combine brand and model names from the dataset
|
65 |
-
df['full_name'] = df['
|
66 |
|
67 |
# Create a list of all car names
|
68 |
car_names = df['full_name'].tolist()
|
@@ -85,7 +82,7 @@ def find_closest_match(df, brand, model):
|
|
85 |
return df.iloc[most_similar_index]
|
86 |
|
87 |
def get_car_overview(car_data):
|
88 |
-
prompt = f"Provide an overview of the following car:\nYear: {car_data['
|
89 |
response = openai.ChatCompletion.create(
|
90 |
model="gpt-3.5-turbo",
|
91 |
messages=[{"role": "user", "content": prompt}]
|
@@ -123,30 +120,31 @@ def predict_price(model, encoders, categorical_features, user_input):
|
|
123 |
current_year = datetime.now().year
|
124 |
|
125 |
for feature, value in user_input.items():
|
126 |
-
|
127 |
-
|
128 |
-
|
|
|
129 |
# If it's a categorical feature but not in encoders, set to 0 (unknown)
|
130 |
-
encoded_features[
|
131 |
else:
|
132 |
# For numerical features, use the value as is
|
133 |
-
encoded_features[
|
134 |
|
135 |
# Calculate additional features
|
136 |
-
encoded_features['
|
137 |
-
encoded_features['
|
138 |
|
139 |
# Assume average mileage per year (you may want to adjust this)
|
140 |
avg_mileage_per_year = 12000
|
141 |
-
encoded_features['
|
142 |
|
143 |
# Assume odometer reading (you may want to adjust this)
|
144 |
-
encoded_features['
|
145 |
|
146 |
input_data = pd.DataFrame([encoded_features])
|
147 |
|
148 |
# Ensure all expected columns are present
|
149 |
-
expected_columns = ['
|
150 |
for col in expected_columns:
|
151 |
if col not in input_data.columns:
|
152 |
input_data[col] = 0 # or some default value
|
@@ -204,10 +202,10 @@ if image is not None:
|
|
204 |
match = find_closest_match(df, brand, model_name)
|
205 |
if match is not None:
|
206 |
st.write("Closest Match Found:")
|
207 |
-
st.write(f"Make: {match['
|
208 |
-
st.write(f"Model: {match['
|
209 |
-
st.write(f"Year: {match['
|
210 |
-
st.write(f"Price: ${match['
|
211 |
|
212 |
# Get additional information using GPT-3.5-turbo
|
213 |
overview = get_car_overview(match)
|
@@ -224,17 +222,17 @@ if image is not None:
|
|
224 |
|
225 |
for year in years:
|
226 |
user_input = {
|
227 |
-
'make': match['
|
228 |
-
'model': match['
|
229 |
'year': year,
|
230 |
-
'condition': match.get('
|
231 |
-
'fuel': match.get('
|
232 |
-
'title_status': match.get('
|
233 |
-
'transmission': match.get('
|
234 |
-
'drive': match.get('
|
235 |
-
'size': match.get('
|
236 |
-
'type': match.get('
|
237 |
-
'paint_color': match.get('
|
238 |
}
|
239 |
|
240 |
price = predict_price(model, label_encoders, categorical_features, user_input)
|
@@ -243,7 +241,7 @@ if image is not None:
|
|
243 |
# Plotting the results
|
244 |
plt.figure(figsize=(10, 5))
|
245 |
plt.plot(years, predicted_prices, marker='o')
|
246 |
-
plt.title(f"Predicted Price of {match['
|
247 |
plt.xlabel("Year")
|
248 |
plt.ylabel("Predicted Price ($)")
|
249 |
plt.grid()
|
|
|
11 |
from huggingface_hub import hf_hub_download
|
12 |
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
|
13 |
import torch
|
|
|
|
|
|
|
14 |
from datetime import datetime
|
15 |
|
16 |
# Dataset loading function with caching
|
|
|
19 |
try:
|
20 |
with st.spinner('Loading dataset...'):
|
21 |
original_data = pd.read_csv('CTP_Model1.csv', low_memory=False)
|
22 |
+
original_data.columns = original_data.columns.str.strip().str.lower()
|
23 |
return original_data
|
24 |
except Exception as e:
|
25 |
st.error(f"Error loading dataset: {str(e)}")
|
|
|
59 |
|
60 |
def find_closest_match(df, brand, model):
|
61 |
# Combine brand and model names from the dataset
|
62 |
+
df['full_name'] = df['make'] + ' ' + df['model']
|
63 |
|
64 |
# Create a list of all car names
|
65 |
car_names = df['full_name'].tolist()
|
|
|
82 |
return df.iloc[most_similar_index]
|
83 |
|
84 |
def get_car_overview(car_data):
|
85 |
+
prompt = f"Provide an overview of the following car:\nYear: {car_data['year']}\nMake: {car_data['make']}\nModel: {car_data['model']}\nTrim: {car_data['trim']}\nPrice: ${car_data['price']}\nCondition: {car_data['condition']}\n"
|
86 |
response = openai.ChatCompletion.create(
|
87 |
model="gpt-3.5-turbo",
|
88 |
messages=[{"role": "user", "content": prompt}]
|
|
|
120 |
current_year = datetime.now().year
|
121 |
|
122 |
for feature, value in user_input.items():
|
123 |
+
feature_lower = feature.lower()
|
124 |
+
if feature_lower in encoders:
|
125 |
+
encoded_features[feature_lower] = encoders[feature_lower].transform([value])[0]
|
126 |
+
elif feature_lower in categorical_features:
|
127 |
# If it's a categorical feature but not in encoders, set to 0 (unknown)
|
128 |
+
encoded_features[feature_lower] = 0
|
129 |
else:
|
130 |
# For numerical features, use the value as is
|
131 |
+
encoded_features[feature_lower] = value
|
132 |
|
133 |
# Calculate additional features
|
134 |
+
encoded_features['age'] = calculate_age(encoded_features['year'])
|
135 |
+
encoded_features['age_squared'] = encoded_features['age'] ** 2
|
136 |
|
137 |
# Assume average mileage per year (you may want to adjust this)
|
138 |
avg_mileage_per_year = 12000
|
139 |
+
encoded_features['mileage_per_year'] = avg_mileage_per_year
|
140 |
|
141 |
# Assume odometer reading (you may want to adjust this)
|
142 |
+
encoded_features['odometer'] = encoded_features['age'] * avg_mileage_per_year
|
143 |
|
144 |
input_data = pd.DataFrame([encoded_features])
|
145 |
|
146 |
# Ensure all expected columns are present
|
147 |
+
expected_columns = ['make', 'model', 'year', 'condition', 'fuel', 'odometer', 'title_status', 'transmission', 'drive', 'size', 'type', 'paint_color', 'age', 'age_squared', 'mileage_per_year']
|
148 |
for col in expected_columns:
|
149 |
if col not in input_data.columns:
|
150 |
input_data[col] = 0 # or some default value
|
|
|
202 |
match = find_closest_match(df, brand, model_name)
|
203 |
if match is not None:
|
204 |
st.write("Closest Match Found:")
|
205 |
+
st.write(f"Make: {match['make']}")
|
206 |
+
st.write(f"Model: {match['model']}")
|
207 |
+
st.write(f"Year: {match['year']}")
|
208 |
+
st.write(f"Price: ${match['price']}")
|
209 |
|
210 |
# Get additional information using GPT-3.5-turbo
|
211 |
overview = get_car_overview(match)
|
|
|
222 |
|
223 |
for year in years:
|
224 |
user_input = {
|
225 |
+
'make': match['make'].lower(),
|
226 |
+
'model': match['model'].lower(),
|
227 |
'year': year,
|
228 |
+
'condition': match.get('condition', 'unknown').lower(),
|
229 |
+
'fuel': match.get('fuel', 'unknown').lower(),
|
230 |
+
'title_status': match.get('title_status', 'unknown').lower(),
|
231 |
+
'transmission': match.get('transmission', 'unknown').lower(),
|
232 |
+
'drive': match.get('drive', 'unknown').lower(),
|
233 |
+
'size': match.get('size', 'unknown').lower(),
|
234 |
+
'type': match.get('type', 'unknown').lower(),
|
235 |
+
'paint_color': match.get('paint_color', 'unknown').lower(),
|
236 |
}
|
237 |
|
238 |
price = predict_price(model, label_encoders, categorical_features, user_input)
|
|
|
241 |
# Plotting the results
|
242 |
plt.figure(figsize=(10, 5))
|
243 |
plt.plot(years, predicted_prices, marker='o')
|
244 |
+
plt.title(f"Predicted Price of {match['make']} {match['model']} Over Time")
|
245 |
plt.xlabel("Year")
|
246 |
plt.ylabel("Predicted Price ($)")
|
247 |
plt.grid()
|