Update app.py
Browse files
app.py
CHANGED
@@ -1,183 +1,8 @@
|
|
1 |
import requests
|
2 |
from bs4 import BeautifulSoup
|
3 |
import json
|
4 |
-
|
5 |
-
base_url = "https://www.carfind.co.za/cars-for-sale"
|
6 |
-
data = []
|
7 |
-
|
8 |
-
# Iterate through the first 10 pages
|
9 |
-
for page_num in range(1, 3):
|
10 |
-
url = f"{base_url}/page{page_num}"
|
11 |
-
response = requests.get(url)
|
12 |
-
soup = BeautifulSoup(response.content, "html.parser")
|
13 |
-
car_listings = soup.find_all("div", class_="center", style="width:900px")
|
14 |
-
|
15 |
-
for listing in car_listings:
|
16 |
-
car_data = {}
|
17 |
-
title_div = listing.find("div", style="margin-top:8px;font-weight:bold;color:#4A75BC;font-size:24px;")
|
18 |
-
if title_div:
|
19 |
-
car_data["title"] = title_div.text.strip()
|
20 |
-
price_div = listing.find("div", class_="h1")
|
21 |
-
if price_div:
|
22 |
-
car_data["price"] = price_div.text.strip()
|
23 |
-
installment_div = listing.find("div", style="font-weight:bolder;color:#4A75BC;white-space:nowrap;margin-top:8px;display: flex;align-items: flex-end;")
|
24 |
-
if installment_div:
|
25 |
-
car_data["installment"] = installment_div.text.strip()
|
26 |
-
status_div = listing.find("div", style="margin-top:20px;font-weight:bolder;color:#4A75BC;font-size:20px")
|
27 |
-
if status_div:
|
28 |
-
car_data["status"] = status_div.text.strip()
|
29 |
-
year_div = listing.find("div", style="margin-top:20px;align-items:center", class_="search_row2")
|
30 |
-
if year_div:
|
31 |
-
year_img = year_div.find("img", alt=True)
|
32 |
-
if year_img and "Year" in year_img["alt"]:
|
33 |
-
car_data["year"] = year_img["alt"].split(" ")[-1]
|
34 |
-
mileage_div = year_div.find_next_sibling("div")
|
35 |
-
if mileage_div:
|
36 |
-
mileage_img = mileage_div.find("img", alt=True)
|
37 |
-
if mileage_img and "Mileage" in mileage_img["alt"]:
|
38 |
-
car_data["mileage"] = mileage_img["alt"].split(" ")[-1]
|
39 |
-
transmission_div = mileage_div.find_next_sibling("div")
|
40 |
-
if transmission_div:
|
41 |
-
transmission_img = transmission_div.find("img", alt=True)
|
42 |
-
if transmission_img and "Transmission" in transmission_img["alt"]:
|
43 |
-
car_data["transmission"] = transmission_img["alt"].split(" ")[-1]
|
44 |
-
dealer_div = listing.find("div", style="margin-top:20px;color:#4A75BC;font-size:20px;")
|
45 |
-
if dealer_div:
|
46 |
-
car_data["dealer"] = dealer_div.text.strip()
|
47 |
-
location_div = listing.find("div", style="display:flex;align-items:center;margin-top:20px;color:#4A75BC")
|
48 |
-
if location_div:
|
49 |
-
location_img = location_div.find("img", alt=True)
|
50 |
-
if location_img and "Location" in location_img["alt"]:
|
51 |
-
car_data["location"] = location_img["alt"].split(" ")[-1]
|
52 |
-
|
53 |
-
# Extract the image URL
|
54 |
-
image_div = listing.find("div", style="min-height:242px;position:relative")
|
55 |
-
if image_div:
|
56 |
-
img_tag = image_div.find("img", style="width:485px;max-height:365px")
|
57 |
-
if img_tag and img_tag["src"]:
|
58 |
-
car_data["image_url"] = img_tag["src"]
|
59 |
-
|
60 |
-
link_tag = listing.find("a", href=True)
|
61 |
-
if link_tag:
|
62 |
-
car_link = "https://www.carfind.co.za" + link_tag["href"]
|
63 |
-
car_data["link"] = car_link
|
64 |
-
|
65 |
-
# Now follow the link to get more details
|
66 |
-
details_response = requests.get(car_link)
|
67 |
-
details_soup = BeautifulSoup(details_response.content, "html.parser")
|
68 |
-
|
69 |
-
# Extract additional details from the details page
|
70 |
-
main_info_div = details_soup.find("div", id="maininfo")
|
71 |
-
if main_info_div:
|
72 |
-
description_div = main_info_div.find("div", style="margin-top:10px;font-size:14px;color:#5C5C5C;font-weight:bold")
|
73 |
-
if description_div:
|
74 |
-
car_data["description"] = description_div.text.strip()
|
75 |
-
|
76 |
-
# Extract other specific details like Body Type, Colour, Engine Size, etc.
|
77 |
-
overview_div = details_soup.find("div", id="overview_div")
|
78 |
-
if overview_div:
|
79 |
-
details_rows = overview_div.find_all("div", class_="vdpoverviewrow")
|
80 |
-
for row in details_rows:
|
81 |
-
header = row.find("div", class_="financeheader")
|
82 |
-
value = row.find("div", class_="bold")
|
83 |
-
if header and value:
|
84 |
-
car_data[header.text.strip()] = value.text.strip()
|
85 |
-
|
86 |
-
data.append(car_data)
|
87 |
-
|
88 |
-
# Save the data to a JSON file
|
89 |
-
with open('/content/car_data.json', 'w') as json_file:
|
90 |
-
json.dump(data, json_file, indent=4, ensure_ascii=False)
|
91 |
-
|
92 |
-
import json
|
93 |
-
|
94 |
-
# Define the mapping of old keys to new keys
|
95 |
-
key_mapping = {
|
96 |
-
"Body Type": "BodyType",
|
97 |
-
"Driving Wheels": "DrivingWheels",
|
98 |
-
"Engine Size": "EngineSize",
|
99 |
-
"Fuel Type": "FuelType",
|
100 |
-
"Gearbox Type": "GearboxType"
|
101 |
-
}
|
102 |
-
|
103 |
-
# Function to rename keys recursively in a JSON object
|
104 |
-
def rename_keys(obj, mapping):
|
105 |
-
if isinstance(obj, dict):
|
106 |
-
new_obj = {}
|
107 |
-
for key, value in obj.items():
|
108 |
-
new_key = mapping.get(key, key) # Use mapped key if found, else use original key
|
109 |
-
new_obj[new_key] = rename_keys(value, mapping)
|
110 |
-
return new_obj
|
111 |
-
elif isinstance(obj, list):
|
112 |
-
return [rename_keys(item, mapping) for item in obj]
|
113 |
-
else:
|
114 |
-
return obj
|
115 |
-
|
116 |
-
# Path to the input JSON file
|
117 |
-
input_json_path = '/content/car_data.json'
|
118 |
-
|
119 |
-
# Path to save the modified JSON file
|
120 |
-
output_json_path = '/content/car_dataformatted.json'
|
121 |
-
|
122 |
-
# Read the JSON file
|
123 |
-
with open(input_json_path, 'r') as file:
|
124 |
-
data = json.load(file)
|
125 |
-
|
126 |
-
# Rename keys
|
127 |
-
modified_data = rename_keys(data, key_mapping)
|
128 |
-
|
129 |
-
# Save the modified JSON to a new file
|
130 |
-
with open(output_json_path, 'w') as file:
|
131 |
-
json.dump(modified_data, file, indent=4)
|
132 |
-
|
133 |
-
print(f"Modified JSON saved to {output_json_path}")
|
134 |
-
import json
|
135 |
-
import concurrent.futures
|
136 |
-
from gradio_client import Client
|
137 |
-
import httpx
|
138 |
-
|
139 |
-
# Function to fetch AI response with timeout handling
|
140 |
-
def fetch_ai_response(client, title):
|
141 |
-
try:
|
142 |
-
result = client.predict(
|
143 |
-
message=f"Provide me with details of a {title} in the metric system",
|
144 |
-
api_name="/chat"
|
145 |
-
)
|
146 |
-
return result
|
147 |
-
except httpx.TimeoutException:
|
148 |
-
return "Timeout occurred"
|
149 |
-
except Exception as e:
|
150 |
-
return f"Error: {e}"
|
151 |
-
|
152 |
-
# Initialize Gradio Client
|
153 |
-
client = Client("IAMTFRMZA/Groq-llama-3-chatbot_70b")
|
154 |
-
|
155 |
-
# Load existing JSON data
|
156 |
-
with open("/content/car_dataformatted.json", "r") as json_file:
|
157 |
-
car_listings = json.load(json_file)
|
158 |
-
|
159 |
-
# Use concurrent processing to fetch AI responses
|
160 |
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
161 |
-
futures = []
|
162 |
-
for listing in car_listings:
|
163 |
-
title = listing["title"]
|
164 |
-
futures.append(executor.submit(fetch_ai_response, client, title))
|
165 |
-
|
166 |
-
# Retrieve results from futures
|
167 |
-
for idx, future in enumerate(concurrent.futures.as_completed(futures)):
|
168 |
-
try:
|
169 |
-
result = future.result()
|
170 |
-
car_listings[idx]["AskAI"] = result
|
171 |
-
except Exception as exc:
|
172 |
-
print(f"Exception occurred: {exc}")
|
173 |
-
|
174 |
-
# Save updated listings back to JSON file
|
175 |
-
with open("/content/car_dataai.json", "w") as json_file:
|
176 |
-
json.dump(car_listings, json_file, indent=4, ensure_ascii=False)
|
177 |
-
|
178 |
-
print("JSON file updated successfully with AI responses.")
|
179 |
import gradio as gr
|
180 |
-
import json
|
181 |
from sentence_transformers import SentenceTransformer, util
|
182 |
|
183 |
# Load the car data
|
|
|
1 |
import requests
|
2 |
from bs4 import BeautifulSoup
|
3 |
import json
|
4 |
+
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
import gradio as gr
|
|
|
6 |
from sentence_transformers import SentenceTransformer, util
|
7 |
|
8 |
# Load the car data
|