EdBoy2202 commited on
Commit
75a11b1
·
verified ·
1 Parent(s): feb93b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -70
app.py CHANGED
@@ -21,7 +21,6 @@ def load_datasets():
21
  try:
22
  with st.spinner('Loading dataset...'):
23
  original_data = pd.read_csv('CTP_Model1.csv', low_memory=False)
24
- original_data.columns = original_data.columns.str.strip().str.lower()
25
  return original_data
26
  except Exception as e:
27
  st.error(f"Error loading dataset: {str(e)}")
@@ -61,7 +60,7 @@ def classify_image(image):
61
 
62
  def find_closest_match(df, brand, model):
63
  # Combine brand and model names from the dataset
64
- df['full_name'] = df['make'] + ' ' + df['model']
65
 
66
  # Create a list of all car names
67
  car_names = df['full_name'].tolist()
@@ -84,7 +83,7 @@ def find_closest_match(df, brand, model):
84
  return df.iloc[most_similar_index]
85
 
86
  def get_car_overview(car_data):
87
- prompt = f"Provide an overview of the following car:\nYear: {car_data['year']}\nMake: {car_data['make']}\nModel: {car_data['model']}\nTrim: {car_data['trim']}\nPrice: ${car_data['price']}\nCondition: {car_data['condition']}\n"
88
  response = openai.ChatCompletion.create(
89
  model="gpt-3.5-turbo",
90
  messages=[{"role": "user", "content": prompt}]
@@ -96,70 +95,53 @@ def load_model_and_encodings():
96
  with st.spinner('Loading model...'):
97
  model_content = hf_hub_download(repo_id="EdBoy2202/car_prediction_model", filename="car_price_modelv3.pkl")
98
  model = joblib.load(model_content)
99
-
100
- original_data = load_datasets()
101
-
102
- label_encoders = {}
103
- categorical_features = original_data.select_dtypes(include=['object']).columns.tolist()
104
-
105
- for feature in categorical_features:
106
- le = LabelEncoder()
107
- unique_values = original_data[feature].fillna('unknown').str.strip().unique()
108
- le.fit(unique_values)
109
- label_encoders[feature.lower()] = le
110
-
111
- return model, label_encoders, categorical_features
112
  except Exception as e:
113
  st.error(f"Error loading model: {str(e)}")
114
  raise e
115
 
116
- def calculate_age(year):
117
- current_year = datetime.now().year
118
- return current_year - year
119
-
120
- def predict_price(model, encoders, categorical_features, user_input):
121
- encoded_features = {}
122
- current_year = datetime.now().year
123
 
124
- for feature, value in user_input.items():
125
- feature_lower = feature.lower()
126
- if feature_lower in encoders:
127
- encoded_features[feature_lower] = encoders[feature_lower].transform([value])[0]
128
- elif feature_lower in categorical_features:
129
- # If it's a categorical feature but not in encoders, set to 0 (unknown)
130
- encoded_features[feature_lower] = 0
131
- else:
132
- # For numerical features, use the value as is
133
- encoded_features[feature_lower] = value
134
 
135
- # Calculate additional features
136
- encoded_features['age'] = calculate_age(encoded_features['year'])
137
- encoded_features['age_squared'] = encoded_features['age'] ** 2
 
138
 
139
- # Assume average mileage per year (you may want to adjust this)
140
- avg_mileage_per_year = 12000
141
- encoded_features['mileage_per_year'] = avg_mileage_per_year
142
 
143
- # Assume odometer reading (you may want to adjust this)
144
- encoded_features['odometer'] = encoded_features['age'] * avg_mileage_per_year
 
 
145
 
146
- input_data = pd.DataFrame([encoded_features])
 
147
 
148
- # Ensure all expected columns are present
149
- expected_columns = ['make', 'model', 'year', 'condition', 'fuel', 'odometer', 'title_status', 'transmission', 'drive', 'size', 'type', 'paint_color', 'age', 'age_squared', 'mileage_per_year']
150
- for col in expected_columns:
151
- if col not in input_data.columns:
152
- input_data[col] = 0 # or some default value
153
 
154
- predicted_price = model.predict(input_data)
 
155
  return predicted_price[0]
156
 
157
  # Streamlit App
158
  st.title("Auto Appraise")
159
  st.write("Upload a car image or take a picture to get its brand, model, overview, and expected price!")
160
 
161
- # Load model and encoders
162
- model, label_encoders, categorical_features = load_model_and_encodings()
163
 
164
  # Initialize OpenAI API key
165
  openai.api_key = st.secrets["GPT_TOKEN"]
@@ -204,10 +186,10 @@ if image is not None:
204
  match = find_closest_match(df, brand, model_name)
205
  if match is not None:
206
  st.write("Closest Match Found:")
207
- st.write(f"Make: {match['make']}")
208
- st.write(f"Model: {match['model']}")
209
- st.write(f"Year: {match['year']}")
210
- st.write(f"Price: ${match['price']}")
211
 
212
  # Get additional information using GPT-3.5-turbo
213
  overview = get_car_overview(match)
@@ -223,27 +205,13 @@ if image is not None:
223
  predicted_prices = []
224
 
225
  for year in years:
226
- user_input = {
227
- 'make': match['make'].lower(),
228
- 'model': match['model'].lower(),
229
- 'year': year,
230
- 'condition': match.get('condition', 'unknown').lower(),
231
- 'fuel': match.get('fuel', 'unknown').lower(),
232
- 'title_status': match.get('title_status', 'unknown').lower(),
233
- 'transmission': match.get('transmission', 'unknown').lower(),
234
- 'drive': match.get('drive', 'unknown').lower(),
235
- 'size': match.get('size', 'unknown').lower(),
236
- 'type': match.get('type', 'unknown').lower(),
237
- 'paint_color': match.get('paint_color', 'unknown').lower(),
238
- }
239
-
240
- price = predict_price(model, label_encoders, categorical_features, user_input)
241
  predicted_prices.append(price)
242
 
243
  # Plotting the results
244
  plt.figure(figsize=(10, 5))
245
  plt.plot(years, predicted_prices, marker='o')
246
- plt.title(f"Predicted Price of {match['make']} {match['model']} Over Time")
247
  plt.xlabel("Year")
248
  plt.ylabel("Predicted Price ($)")
249
  plt.grid()
 
21
  try:
22
  with st.spinner('Loading dataset...'):
23
  original_data = pd.read_csv('CTP_Model1.csv', low_memory=False)
 
24
  return original_data
25
  except Exception as e:
26
  st.error(f"Error loading dataset: {str(e)}")
 
60
 
61
  def find_closest_match(df, brand, model):
62
  # Combine brand and model names from the dataset
63
+ df['full_name'] = df['Make'] + ' ' + df['Model']
64
 
65
  # Create a list of all car names
66
  car_names = df['full_name'].tolist()
 
83
  return df.iloc[most_similar_index]
84
 
85
  def get_car_overview(car_data):
86
+ prompt = f"Provide an overview of the following car:\nYear: {car_data['Year']}\nMake: {car_data['Make']}\nModel: {car_data['Model']}\nTrim: {car_data['Trim']}\nPrice: ${car_data['Price']}\nCondition: {car_data['Condition']}\n"
87
  response = openai.ChatCompletion.create(
88
  model="gpt-3.5-turbo",
89
  messages=[{"role": "user", "content": prompt}]
 
95
  with st.spinner('Loading model...'):
96
  model_content = hf_hub_download(repo_id="EdBoy2202/car_prediction_model", filename="car_price_modelv3.pkl")
97
  model = joblib.load(model_content)
98
+ return model
 
 
 
 
 
 
 
 
 
 
 
 
99
  except Exception as e:
100
  st.error(f"Error loading model: {str(e)}")
101
  raise e
102
 
103
+ def predict_price(model, match, year):
104
+ # Start with the data from the closest match
105
+ input_data = match.copy()
 
 
 
 
106
 
107
+ # Update the year
108
+ input_data['Year'] = year
109
+
110
+ # Calculate age
111
+ current_year = datetime.now().year
112
+ input_data['Age'] = current_year - year
113
+ input_data['Age_squared'] = input_data['Age'] ** 2
 
 
 
114
 
115
+ # If odometer is missing, estimate it based on age and average yearly mileage
116
+ if 'Odometer' not in input_data or pd.isna(input_data['Odometer']):
117
+ avg_yearly_mileage = 12000 # Adjust this value as needed
118
+ input_data['Odometer'] = input_data['Age'] * avg_yearly_mileage
119
 
120
+ # Ensure all required columns are present
121
+ required_columns = ['Make', 'Model', 'Year', 'Condition', 'Fuel', 'Odometer', 'Title_status', 'Transmission', 'Drive', 'Size', 'Type', 'Paint_color', 'Age', 'Age_squared']
 
122
 
123
+ for col in required_columns:
124
+ if col not in input_data or pd.isna(input_data[col]):
125
+ # If a required column is missing, fill it with the most common value from the dataset
126
+ input_data[col] = df[col].mode().iloc[0]
127
 
128
+ # Prepare the input for the model
129
+ input_df = pd.DataFrame([input_data])
130
 
131
+ # Make sure to only include columns that the model expects
132
+ model_columns = model.feature_names_in_
133
+ input_df = input_df[model_columns]
 
 
134
 
135
+ # Predict the price
136
+ predicted_price = model.predict(input_df)
137
  return predicted_price[0]
138
 
139
  # Streamlit App
140
  st.title("Auto Appraise")
141
  st.write("Upload a car image or take a picture to get its brand, model, overview, and expected price!")
142
 
143
+ # Load model and encodings
144
+ model = load_model_and_encodings()
145
 
146
  # Initialize OpenAI API key
147
  openai.api_key = st.secrets["GPT_TOKEN"]
 
186
  match = find_closest_match(df, brand, model_name)
187
  if match is not None:
188
  st.write("Closest Match Found:")
189
+ st.write(f"Make: {match['Make']}")
190
+ st.write(f"Model: {match['Model']}")
191
+ st.write(f"Year: {match['Year']}")
192
+ st.write(f"Price: ${match['Price']}")
193
 
194
  # Get additional information using GPT-3.5-turbo
195
  overview = get_car_overview(match)
 
205
  predicted_prices = []
206
 
207
  for year in years:
208
+ price = predict_price(model, match, year)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  predicted_prices.append(price)
210
 
211
  # Plotting the results
212
  plt.figure(figsize=(10, 5))
213
  plt.plot(years, predicted_prices, marker='o')
214
+ plt.title(f"Predicted Price of {match['Make']} {match['Model']} Over Time")
215
  plt.xlabel("Year")
216
  plt.ylabel("Predicted Price ($)")
217
  plt.grid()