IAMTFRMZA commited on
Commit
48caf49
·
verified ·
1 Parent(s): 47c5e0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -176
app.py CHANGED
@@ -1,183 +1,8 @@
1
  import requests
2
  from bs4 import BeautifulSoup
3
  import json
4
-
5
- base_url = "https://www.carfind.co.za/cars-for-sale"
6
- data = []
7
-
8
- # Iterate through the first 10 pages
9
- for page_num in range(1, 3):
10
- url = f"{base_url}/page{page_num}"
11
- response = requests.get(url)
12
- soup = BeautifulSoup(response.content, "html.parser")
13
- car_listings = soup.find_all("div", class_="center", style="width:900px")
14
-
15
- for listing in car_listings:
16
- car_data = {}
17
- title_div = listing.find("div", style="margin-top:8px;font-weight:bold;color:#4A75BC;font-size:24px;")
18
- if title_div:
19
- car_data["title"] = title_div.text.strip()
20
- price_div = listing.find("div", class_="h1")
21
- if price_div:
22
- car_data["price"] = price_div.text.strip()
23
- installment_div = listing.find("div", style="font-weight:bolder;color:#4A75BC;white-space:nowrap;margin-top:8px;display: flex;align-items: flex-end;")
24
- if installment_div:
25
- car_data["installment"] = installment_div.text.strip()
26
- status_div = listing.find("div", style="margin-top:20px;font-weight:bolder;color:#4A75BC;font-size:20px")
27
- if status_div:
28
- car_data["status"] = status_div.text.strip()
29
- year_div = listing.find("div", style="margin-top:20px;align-items:center", class_="search_row2")
30
- if year_div:
31
- year_img = year_div.find("img", alt=True)
32
- if year_img and "Year" in year_img["alt"]:
33
- car_data["year"] = year_img["alt"].split(" ")[-1]
34
- mileage_div = year_div.find_next_sibling("div")
35
- if mileage_div:
36
- mileage_img = mileage_div.find("img", alt=True)
37
- if mileage_img and "Mileage" in mileage_img["alt"]:
38
- car_data["mileage"] = mileage_img["alt"].split(" ")[-1]
39
- transmission_div = mileage_div.find_next_sibling("div")
40
- if transmission_div:
41
- transmission_img = transmission_div.find("img", alt=True)
42
- if transmission_img and "Transmission" in transmission_img["alt"]:
43
- car_data["transmission"] = transmission_img["alt"].split(" ")[-1]
44
- dealer_div = listing.find("div", style="margin-top:20px;color:#4A75BC;font-size:20px;")
45
- if dealer_div:
46
- car_data["dealer"] = dealer_div.text.strip()
47
- location_div = listing.find("div", style="display:flex;align-items:center;margin-top:20px;color:#4A75BC")
48
- if location_div:
49
- location_img = location_div.find("img", alt=True)
50
- if location_img and "Location" in location_img["alt"]:
51
- car_data["location"] = location_img["alt"].split(" ")[-1]
52
-
53
- # Extract the image URL
54
- image_div = listing.find("div", style="min-height:242px;position:relative")
55
- if image_div:
56
- img_tag = image_div.find("img", style="width:485px;max-height:365px")
57
- if img_tag and img_tag["src"]:
58
- car_data["image_url"] = img_tag["src"]
59
-
60
- link_tag = listing.find("a", href=True)
61
- if link_tag:
62
- car_link = "https://www.carfind.co.za" + link_tag["href"]
63
- car_data["link"] = car_link
64
-
65
- # Now follow the link to get more details
66
- details_response = requests.get(car_link)
67
- details_soup = BeautifulSoup(details_response.content, "html.parser")
68
-
69
- # Extract additional details from the details page
70
- main_info_div = details_soup.find("div", id="maininfo")
71
- if main_info_div:
72
- description_div = main_info_div.find("div", style="margin-top:10px;font-size:14px;color:#5C5C5C;font-weight:bold")
73
- if description_div:
74
- car_data["description"] = description_div.text.strip()
75
-
76
- # Extract other specific details like Body Type, Colour, Engine Size, etc.
77
- overview_div = details_soup.find("div", id="overview_div")
78
- if overview_div:
79
- details_rows = overview_div.find_all("div", class_="vdpoverviewrow")
80
- for row in details_rows:
81
- header = row.find("div", class_="financeheader")
82
- value = row.find("div", class_="bold")
83
- if header and value:
84
- car_data[header.text.strip()] = value.text.strip()
85
-
86
- data.append(car_data)
87
-
88
- # Save the data to a JSON file
89
- with open('/content/car_data.json', 'w') as json_file:
90
- json.dump(data, json_file, indent=4, ensure_ascii=False)
91
-
92
- import json
93
-
94
- # Define the mapping of old keys to new keys
95
- key_mapping = {
96
- "Body Type": "BodyType",
97
- "Driving Wheels": "DrivingWheels",
98
- "Engine Size": "EngineSize",
99
- "Fuel Type": "FuelType",
100
- "Gearbox Type": "GearboxType"
101
- }
102
-
103
- # Function to rename keys recursively in a JSON object
104
- def rename_keys(obj, mapping):
105
- if isinstance(obj, dict):
106
- new_obj = {}
107
- for key, value in obj.items():
108
- new_key = mapping.get(key, key) # Use mapped key if found, else use original key
109
- new_obj[new_key] = rename_keys(value, mapping)
110
- return new_obj
111
- elif isinstance(obj, list):
112
- return [rename_keys(item, mapping) for item in obj]
113
- else:
114
- return obj
115
-
116
- # Path to the input JSON file
117
- input_json_path = '/content/car_data.json'
118
-
119
- # Path to save the modified JSON file
120
- output_json_path = '/content/car_dataformatted.json'
121
-
122
- # Read the JSON file
123
- with open(input_json_path, 'r') as file:
124
- data = json.load(file)
125
-
126
- # Rename keys
127
- modified_data = rename_keys(data, key_mapping)
128
-
129
- # Save the modified JSON to a new file
130
- with open(output_json_path, 'w') as file:
131
- json.dump(modified_data, file, indent=4)
132
-
133
- print(f"Modified JSON saved to {output_json_path}")
134
- import json
135
- import concurrent.futures
136
- from gradio_client import Client
137
- import httpx
138
-
139
- # Function to fetch AI response with timeout handling
140
- def fetch_ai_response(client, title):
141
- try:
142
- result = client.predict(
143
- message=f"Provide me with details of a {title} in the metric system",
144
- api_name="/chat"
145
- )
146
- return result
147
- except httpx.TimeoutException:
148
- return "Timeout occurred"
149
- except Exception as e:
150
- return f"Error: {e}"
151
-
152
- # Initialize Gradio Client
153
- client = Client("IAMTFRMZA/Groq-llama-3-chatbot_70b")
154
-
155
- # Load existing JSON data
156
- with open("/content/car_dataformatted.json", "r") as json_file:
157
- car_listings = json.load(json_file)
158
-
159
- # Use concurrent processing to fetch AI responses
160
- with concurrent.futures.ThreadPoolExecutor() as executor:
161
- futures = []
162
- for listing in car_listings:
163
- title = listing["title"]
164
- futures.append(executor.submit(fetch_ai_response, client, title))
165
-
166
- # Retrieve results from futures
167
- for idx, future in enumerate(concurrent.futures.as_completed(futures)):
168
- try:
169
- result = future.result()
170
- car_listings[idx]["AskAI"] = result
171
- except Exception as exc:
172
- print(f"Exception occurred: {exc}")
173
-
174
- # Save updated listings back to JSON file
175
- with open("/content/car_dataai.json", "w") as json_file:
176
- json.dump(car_listings, json_file, indent=4, ensure_ascii=False)
177
-
178
- print("JSON file updated successfully with AI responses.")
179
  import gradio as gr
180
- import json
181
  from sentence_transformers import SentenceTransformer, util
182
 
183
  # Load the car data
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
  import json
4
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import gradio as gr
 
6
  from sentence_transformers import SentenceTransformer, util
7
 
8
  # Load the car data