IAMTFRMZA commited on
Commit
47c5e0c
·
verified ·
1 Parent(s): 3b37a19

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +295 -0
app.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import json
4
+
5
+ base_url = "https://www.carfind.co.za/cars-for-sale"
6
+ data = []
7
+
8
+ # Iterate through the first 10 pages
9
+ for page_num in range(1, 3):
10
+ url = f"{base_url}/page{page_num}"
11
+ response = requests.get(url)
12
+ soup = BeautifulSoup(response.content, "html.parser")
13
+ car_listings = soup.find_all("div", class_="center", style="width:900px")
14
+
15
+ for listing in car_listings:
16
+ car_data = {}
17
+ title_div = listing.find("div", style="margin-top:8px;font-weight:bold;color:#4A75BC;font-size:24px;")
18
+ if title_div:
19
+ car_data["title"] = title_div.text.strip()
20
+ price_div = listing.find("div", class_="h1")
21
+ if price_div:
22
+ car_data["price"] = price_div.text.strip()
23
+ installment_div = listing.find("div", style="font-weight:bolder;color:#4A75BC;white-space:nowrap;margin-top:8px;display: flex;align-items: flex-end;")
24
+ if installment_div:
25
+ car_data["installment"] = installment_div.text.strip()
26
+ status_div = listing.find("div", style="margin-top:20px;font-weight:bolder;color:#4A75BC;font-size:20px")
27
+ if status_div:
28
+ car_data["status"] = status_div.text.strip()
29
+ year_div = listing.find("div", style="margin-top:20px;align-items:center", class_="search_row2")
30
+ if year_div:
31
+ year_img = year_div.find("img", alt=True)
32
+ if year_img and "Year" in year_img["alt"]:
33
+ car_data["year"] = year_img["alt"].split(" ")[-1]
34
+ mileage_div = year_div.find_next_sibling("div")
35
+ if mileage_div:
36
+ mileage_img = mileage_div.find("img", alt=True)
37
+ if mileage_img and "Mileage" in mileage_img["alt"]:
38
+ car_data["mileage"] = mileage_img["alt"].split(" ")[-1]
39
+ transmission_div = mileage_div.find_next_sibling("div")
40
+ if transmission_div:
41
+ transmission_img = transmission_div.find("img", alt=True)
42
+ if transmission_img and "Transmission" in transmission_img["alt"]:
43
+ car_data["transmission"] = transmission_img["alt"].split(" ")[-1]
44
+ dealer_div = listing.find("div", style="margin-top:20px;color:#4A75BC;font-size:20px;")
45
+ if dealer_div:
46
+ car_data["dealer"] = dealer_div.text.strip()
47
+ location_div = listing.find("div", style="display:flex;align-items:center;margin-top:20px;color:#4A75BC")
48
+ if location_div:
49
+ location_img = location_div.find("img", alt=True)
50
+ if location_img and "Location" in location_img["alt"]:
51
+ car_data["location"] = location_img["alt"].split(" ")[-1]
52
+
53
+ # Extract the image URL
54
+ image_div = listing.find("div", style="min-height:242px;position:relative")
55
+ if image_div:
56
+ img_tag = image_div.find("img", style="width:485px;max-height:365px")
57
+ if img_tag and img_tag["src"]:
58
+ car_data["image_url"] = img_tag["src"]
59
+
60
+ link_tag = listing.find("a", href=True)
61
+ if link_tag:
62
+ car_link = "https://www.carfind.co.za" + link_tag["href"]
63
+ car_data["link"] = car_link
64
+
65
+ # Now follow the link to get more details
66
+ details_response = requests.get(car_link)
67
+ details_soup = BeautifulSoup(details_response.content, "html.parser")
68
+
69
+ # Extract additional details from the details page
70
+ main_info_div = details_soup.find("div", id="maininfo")
71
+ if main_info_div:
72
+ description_div = main_info_div.find("div", style="margin-top:10px;font-size:14px;color:#5C5C5C;font-weight:bold")
73
+ if description_div:
74
+ car_data["description"] = description_div.text.strip()
75
+
76
+ # Extract other specific details like Body Type, Colour, Engine Size, etc.
77
+ overview_div = details_soup.find("div", id="overview_div")
78
+ if overview_div:
79
+ details_rows = overview_div.find_all("div", class_="vdpoverviewrow")
80
+ for row in details_rows:
81
+ header = row.find("div", class_="financeheader")
82
+ value = row.find("div", class_="bold")
83
+ if header and value:
84
+ car_data[header.text.strip()] = value.text.strip()
85
+
86
+ data.append(car_data)
87
+
88
+ # Save the data to a JSON file
89
+ with open('/content/car_data.json', 'w') as json_file:
90
+ json.dump(data, json_file, indent=4, ensure_ascii=False)
91
+
92
+ import json
93
+
94
+ # Define the mapping of old keys to new keys
95
+ key_mapping = {
96
+ "Body Type": "BodyType",
97
+ "Driving Wheels": "DrivingWheels",
98
+ "Engine Size": "EngineSize",
99
+ "Fuel Type": "FuelType",
100
+ "Gearbox Type": "GearboxType"
101
+ }
102
+
103
+ # Function to rename keys recursively in a JSON object
104
+ def rename_keys(obj, mapping):
105
+ if isinstance(obj, dict):
106
+ new_obj = {}
107
+ for key, value in obj.items():
108
+ new_key = mapping.get(key, key) # Use mapped key if found, else use original key
109
+ new_obj[new_key] = rename_keys(value, mapping)
110
+ return new_obj
111
+ elif isinstance(obj, list):
112
+ return [rename_keys(item, mapping) for item in obj]
113
+ else:
114
+ return obj
115
+
116
+ # Path to the input JSON file
117
+ input_json_path = '/content/car_data.json'
118
+
119
+ # Path to save the modified JSON file
120
+ output_json_path = '/content/car_dataformatted.json'
121
+
122
+ # Read the JSON file
123
+ with open(input_json_path, 'r') as file:
124
+ data = json.load(file)
125
+
126
+ # Rename keys
127
+ modified_data = rename_keys(data, key_mapping)
128
+
129
+ # Save the modified JSON to a new file
130
+ with open(output_json_path, 'w') as file:
131
+ json.dump(modified_data, file, indent=4)
132
+
133
+ print(f"Modified JSON saved to {output_json_path}")
134
+ import json
135
+ import concurrent.futures
136
+ from gradio_client import Client
137
+ import httpx
138
+
139
+ # Function to fetch AI response with timeout handling
140
+ def fetch_ai_response(client, title):
141
+ try:
142
+ result = client.predict(
143
+ message=f"Provide me with details of a {title} in the metric system",
144
+ api_name="/chat"
145
+ )
146
+ return result
147
+ except httpx.TimeoutException:
148
+ return "Timeout occurred"
149
+ except Exception as e:
150
+ return f"Error: {e}"
151
+
152
+ # Initialize Gradio Client
153
+ client = Client("IAMTFRMZA/Groq-llama-3-chatbot_70b")
154
+
155
+ # Load existing JSON data
156
+ with open("/content/car_dataformatted.json", "r") as json_file:
157
+ car_listings = json.load(json_file)
158
+
159
+ # Use concurrent processing to fetch AI responses
160
+ with concurrent.futures.ThreadPoolExecutor() as executor:
161
+ futures = []
162
+ for listing in car_listings:
163
+ title = listing["title"]
164
+ futures.append(executor.submit(fetch_ai_response, client, title))
165
+
166
+ # Retrieve results from futures
167
+ for idx, future in enumerate(concurrent.futures.as_completed(futures)):
168
+ try:
169
+ result = future.result()
170
+ car_listings[idx]["AskAI"] = result
171
+ except Exception as exc:
172
+ print(f"Exception occurred: {exc}")
173
+
174
+ # Save updated listings back to JSON file
175
+ with open("/content/car_dataai.json", "w") as json_file:
176
+ json.dump(car_listings, json_file, indent=4, ensure_ascii=False)
177
+
178
+ print("JSON file updated successfully with AI responses.")
179
+ import gradio as gr
180
+ import json
181
+ from sentence_transformers import SentenceTransformer, util
182
+
183
+ # Load the car data
184
+ with open('/content/car_dataai.json', 'r') as f:
185
+ car_data = json.load(f)
186
+
187
+ # Function to normalize key names
188
+ def normalize_key(key):
189
+ return key.strip().lower().replace(' ', '_')
190
+
191
+ # Prepare car listings with additional fields
192
+ car_listings = []
193
+ for car in car_data:
194
+ description = f"{car.get('title')} - {car.get('price')} - {car.get('status')} - {car.get('link')} - {car.get('year')} - {car.get('location')} - {car.get('image_url')} - {car.get('mileage')} - {car.get('BodyType')} - {car.get('colour')} - {car.get('DrivingWheels')} - {car.get('EngineSize')} - {car.get('FuelType')} - {car.get('GearboxType')} - {car.get('power')} - {car.get('seats')}"
195
+ car_listings.append(description.strip())
196
+
197
+ # Load a pre-trained Sentence Transformer model
198
+ model_name = 'paraphrase-MiniLM-L6-v2'
199
+ model = SentenceTransformer(model_name)
200
+
201
+ # Vectorize the car listings
202
+ car_embeddings = model.encode(car_listings, convert_to_tensor=True)
203
+
204
+ def search_cars(query):
205
+ # Vectorize the query
206
+ query_embedding = model.encode(query, convert_to_tensor=True)
207
+
208
+ # Calculate cosine similarity between the query and each car listing
209
+ cos_scores = util.pytorch_cos_sim(query_embedding, car_embeddings)[0]
210
+
211
+ # Normalize query to lower case and remove spaces for comparison
212
+ query_normalized = query.replace(" ", "").lower()
213
+
214
+ # Adjust boost logic and thresholds
215
+ boosted_scores = []
216
+ for score, car in zip(cos_scores, car_data):
217
+ boost = 0
218
+ # Normalize fields for comparison
219
+ normalized_fields = {normalize_key(field): car.get(field, '').replace(" ", "").lower() for field in ['title', 'price', 'status', 'year', 'location']}
220
+
221
+ # Check if query term is in any relevant field
222
+ if any(query_normalized in normalized_fields[field] for field in normalized_fields):
223
+ boost = 0.8 # Increase boost for better differentiation
224
+
225
+ boosted_scores.append(score.item() + boost)
226
+
227
+ # Combine the scores with their respective car descriptions and sort
228
+ results = list(zip(boosted_scores, car_listings, car_data))
229
+ results = sorted(results, key=lambda x: x[0], reverse=True)
230
+
231
+ # Separate results into two tables based on the score threshold
232
+ high_score_results = [r for r in results if r[0] >= 0.5] # Adjust threshold as needed
233
+ low_score_results = [r for r in results if 0.1 <= r[0] < 0.5] # Adjust lower score threshold
234
+
235
+ # Format results for display as cards
236
+ response_html = ""
237
+
238
+ # High score results cards
239
+ if high_score_results:
240
+ response_html += "<h2>Here are your results</h2>"
241
+ response_html += "<div style='display:flex; flex-wrap:wrap;'>"
242
+ for score, car_description, car_info in high_score_results:
243
+ response_html += "<div style='border: 1px solid #ddd; border-radius: 5px; padding: 10px; margin: 10px; width: 300px;'>"
244
+ response_html += f"<img src='{car_info.get('image_url')}' width='100' style='float:left; margin-right: 10px;'>"
245
+ response_html += f"<h3>{car_info.get('title')}</h3>"
246
+ response_html += f"<p><b>Price:</b> {car_info.get('price')}</p>"
247
+ response_html += f"<p><b>Status:</b> {car_info.get('status')}</p>"
248
+ response_html += f"<p><b>Body Type:</b> {car_info.get('BodyType')}</p>"
249
+ response_html += f"<p><b>Year:</b> {car_info.get('year')}</p>"
250
+ response_html += f"<p><b>Mileage:</b> {car_info.get('Mileage')}</p>"
251
+ response_html += f"<p><b>Location:</b> {car_info.get('location')}</p>"
252
+ response_html += f"<a href='{car_info.get('link')}' target='_blank'>View Listing</a>"
253
+ response_html += f"<p><b>Similarity Score:</b> {score:.4f}</p>"
254
+ response_html += "</div>"
255
+ response_html += "</div>"
256
+
257
+ # Low score results cards
258
+ if low_score_results:
259
+ response_html += "<h2>Some more results you might be interested in</h2>"
260
+ response_html += "<div style='display:flex; flex-wrap:wrap;'>"
261
+ for score, car_description, car_info in low_score_results:
262
+ response_html += "<div style='border: 1px solid #ddd; border-radius: 5px; padding: 10px; margin: 10px; width: 300px;'>"
263
+ response_html += f"<img src='{car_info.get('image_url')}' width='100' style='float:left; margin-right: 10px;'>"
264
+ response_html += f"<h3>{car_info.get('title')}</h3>"
265
+ response_html += f"<p><b>Price:</b> {car_info.get('price')}</p>"
266
+ response_html += f"<p><b>Status:</b> {car_info.get('status')}</p>"
267
+ response_html += f"<p><b>Body Type:</b> {car_info.get('BodyType')}</p>"
268
+ response_html += f"<p><b>Year:</b> {car_info.get('year')}</p>"
269
+ response_html += f"<p><b>Mileage:</b> {car_info.get('Mileage')}</p>"
270
+ response_html += f"<p><b>Location:</b> {car_info.get('location')}</p>"
271
+ response_html += f"<a href='{car_info.get('link')}' target='_blank' style='text-decoration:none; color:#333; background-color:#f0f0f0; padding:8px 12px; border-radius:4px; margin-right: 10px;'>View Listing</a>"
272
+ response_html += f"<p><b>Similarity Score:</b> {score:.4f}</p>"
273
+ response_html += "</div>"
274
+ response_html += "</div>"
275
+
276
+ return response_html
277
+
278
+ # Create a chat interface using Gradio's Blocks
279
+ def create_chat_interface():
280
+ with gr.Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo:
281
+ with gr.Row():
282
+ gr.Markdown("## Ai Search")
283
+ with gr.Row():
284
+ query_input = gr.Textbox(label="What are you looking for?", placeholder="Please type your query here")
285
+ with gr.Row():
286
+ output_html = gr.HTML(label="Search Results")
287
+
288
+ query_input.change(fn=search_cars, inputs=query_input, outputs=output_html)
289
+
290
+ return demo
291
+
292
+ # Launch the chat interface
293
+ demo = create_chat_interface()
294
+ demo.launch()
295
+