Spaces:

IAMTFRMZA
/

AISearch

Sleeping

App Files Files Community

IAMTFRMZA commited on Jul 17, 2024

Commit

47c5e0c

verified ·

1 Parent(s): 3b37a19

Create app.py

Browse files

Files changed (1) hide show

app.py +295 -0

app.py ADDED Viewed

	@@ -0,0 +1,295 @@

+import requests
+from bs4 import BeautifulSoup
+import json
+base_url = "https://www.carfind.co.za/cars-for-sale"
+data = []
+# Iterate through the first 10 pages
+for page_num in range(1, 3):
+    url = f"{base_url}/page{page_num}"
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, "html.parser")
+    car_listings = soup.find_all("div", class_="center", style="width:900px")
+    for listing in car_listings:
+        car_data = {}
+        title_div = listing.find("div", style="margin-top:8px;font-weight:bold;color:#4A75BC;font-size:24px;")
+        if title_div:
+            car_data["title"] = title_div.text.strip()
+        price_div = listing.find("div", class_="h1")
+        if price_div:
+            car_data["price"] = price_div.text.strip()
+        installment_div = listing.find("div", style="font-weight:bolder;color:#4A75BC;white-space:nowrap;margin-top:8px;display: flex;align-items: flex-end;")
+        if installment_div:
+            car_data["installment"] = installment_div.text.strip()
+        status_div = listing.find("div", style="margin-top:20px;font-weight:bolder;color:#4A75BC;font-size:20px")
+        if status_div:
+            car_data["status"] = status_div.text.strip()
+        year_div = listing.find("div", style="margin-top:20px;align-items:center", class_="search_row2")
+        if year_div:
+            year_img = year_div.find("img", alt=True)
+            if year_img and "Year" in year_img["alt"]:
+                car_data["year"] = year_img["alt"].split(" ")[-1]
+        mileage_div = year_div.find_next_sibling("div")
+        if mileage_div:
+            mileage_img = mileage_div.find("img", alt=True)
+            if mileage_img and "Mileage" in mileage_img["alt"]:
+                car_data["mileage"] = mileage_img["alt"].split(" ")[-1]
+        transmission_div = mileage_div.find_next_sibling("div")
+        if transmission_div:
+            transmission_img = transmission_div.find("img", alt=True)
+            if transmission_img and "Transmission" in transmission_img["alt"]:
+                car_data["transmission"] = transmission_img["alt"].split(" ")[-1]
+        dealer_div = listing.find("div", style="margin-top:20px;color:#4A75BC;font-size:20px;")
+        if dealer_div:
+            car_data["dealer"] = dealer_div.text.strip()
+        location_div = listing.find("div", style="display:flex;align-items:center;margin-top:20px;color:#4A75BC")
+        if location_div:
+            location_img = location_div.find("img", alt=True)
+            if location_img and "Location" in location_img["alt"]:
+                car_data["location"] = location_img["alt"].split(" ")[-1]
+        # Extract the image URL
+        image_div = listing.find("div", style="min-height:242px;position:relative")
+        if image_div:
+            img_tag = image_div.find("img", style="width:485px;max-height:365px")
+            if img_tag and img_tag["src"]:
+                car_data["image_url"] = img_tag["src"]
+        link_tag = listing.find("a", href=True)
+        if link_tag:
+            car_link = "https://www.carfind.co.za" + link_tag["href"]
+            car_data["link"] = car_link
+            # Now follow the link to get more details
+            details_response = requests.get(car_link)
+            details_soup = BeautifulSoup(details_response.content, "html.parser")
+            # Extract additional details from the details page
+            main_info_div = details_soup.find("div", id="maininfo")
+            if main_info_div:
+                description_div = main_info_div.find("div", style="margin-top:10px;font-size:14px;color:#5C5C5C;font-weight:bold")
+                if description_div:
+                    car_data["description"] = description_div.text.strip()
+                # Extract other specific details like Body Type, Colour, Engine Size, etc.
+                overview_div = details_soup.find("div", id="overview_div")
+                if overview_div:
+                    details_rows = overview_div.find_all("div", class_="vdpoverviewrow")
+                    for row in details_rows:
+                        header = row.find("div", class_="financeheader")
+                        value = row.find("div", class_="bold")
+                        if header and value:
+                            car_data[header.text.strip()] = value.text.strip()
+        data.append(car_data)
+# Save the data to a JSON file
+with open('/content/car_data.json', 'w') as json_file:
+    json.dump(data, json_file, indent=4, ensure_ascii=False)
+import json
+# Define the mapping of old keys to new keys
+key_mapping = {
+    "Body Type": "BodyType",
+    "Driving Wheels": "DrivingWheels",
+    "Engine Size": "EngineSize",
+    "Fuel Type": "FuelType",
+    "Gearbox Type": "GearboxType"
+}
+# Function to rename keys recursively in a JSON object
+def rename_keys(obj, mapping):
+    if isinstance(obj, dict):
+        new_obj = {}
+        for key, value in obj.items():
+            new_key = mapping.get(key, key)  # Use mapped key if found, else use original key
+            new_obj[new_key] = rename_keys(value, mapping)
+        return new_obj
+    elif isinstance(obj, list):
+        return [rename_keys(item, mapping) for item in obj]
+    else:
+        return obj
+# Path to the input JSON file
+input_json_path = '/content/car_data.json'
+# Path to save the modified JSON file
+output_json_path = '/content/car_dataformatted.json'
+# Read the JSON file
+with open(input_json_path, 'r') as file:
+    data = json.load(file)
+# Rename keys
+modified_data = rename_keys(data, key_mapping)
+# Save the modified JSON to a new file
+with open(output_json_path, 'w') as file:
+    json.dump(modified_data, file, indent=4)
+print(f"Modified JSON saved to {output_json_path}")
+import json
+import concurrent.futures
+from gradio_client import Client
+import httpx
+# Function to fetch AI response with timeout handling
+def fetch_ai_response(client, title):
+    try:
+        result = client.predict(
+            message=f"Provide me with details of a {title} in the metric system",
+            api_name="/chat"
+        )
+        return result
+    except httpx.TimeoutException:
+        return "Timeout occurred"
+    except Exception as e:
+        return f"Error: {e}"
+# Initialize Gradio Client
+client = Client("IAMTFRMZA/Groq-llama-3-chatbot_70b")
+# Load existing JSON data
+with open("/content/car_dataformatted.json", "r") as json_file:
+    car_listings = json.load(json_file)
+# Use concurrent processing to fetch AI responses
+with concurrent.futures.ThreadPoolExecutor() as executor:
+    futures = []
+    for listing in car_listings:
+        title = listing["title"]
+        futures.append(executor.submit(fetch_ai_response, client, title))
+    # Retrieve results from futures
+    for idx, future in enumerate(concurrent.futures.as_completed(futures)):
+        try:
+            result = future.result()
+            car_listings[idx]["AskAI"] = result
+        except Exception as exc:
+            print(f"Exception occurred: {exc}")
+# Save updated listings back to JSON file
+with open("/content/car_dataai.json", "w") as json_file:
+    json.dump(car_listings, json_file, indent=4, ensure_ascii=False)
+print("JSON file updated successfully with AI responses.")
+import gradio as gr
+import json
+from sentence_transformers import SentenceTransformer, util
+# Load the car data
+with open('/content/car_dataai.json', 'r') as f:
+    car_data = json.load(f)
+# Function to normalize key names
+def normalize_key(key):
+    return key.strip().lower().replace(' ', '_')
+# Prepare car listings with additional fields
+car_listings = []
+for car in car_data:
+    description = f"{car.get('title')} - {car.get('price')} - {car.get('status')} - {car.get('link')} - {car.get('year')} - {car.get('location')} - {car.get('image_url')} - {car.get('mileage')} - {car.get('BodyType')} - {car.get('colour')} - {car.get('DrivingWheels')} - {car.get('EngineSize')} - {car.get('FuelType')} - {car.get('GearboxType')} - {car.get('power')} - {car.get('seats')}"
+    car_listings.append(description.strip())
+# Load a pre-trained Sentence Transformer model
+model_name = 'paraphrase-MiniLM-L6-v2'
+model = SentenceTransformer(model_name)
+# Vectorize the car listings
+car_embeddings = model.encode(car_listings, convert_to_tensor=True)
+def search_cars(query):
+    # Vectorize the query
+    query_embedding = model.encode(query, convert_to_tensor=True)
+    # Calculate cosine similarity between the query and each car listing
+    cos_scores = util.pytorch_cos_sim(query_embedding, car_embeddings)[0]
+    # Normalize query to lower case and remove spaces for comparison
+    query_normalized = query.replace(" ", "").lower()
+    # Adjust boost logic and thresholds
+    boosted_scores = []
+    for score, car in zip(cos_scores, car_data):
+        boost = 0
+        # Normalize fields for comparison
+        normalized_fields = {normalize_key(field): car.get(field, '').replace(" ", "").lower() for field in ['title', 'price', 'status', 'year', 'location']}
+        # Check if query term is in any relevant field
+        if any(query_normalized in normalized_fields[field] for field in normalized_fields):
+            boost = 0.8  # Increase boost for better differentiation
+        boosted_scores.append(score.item() + boost)
+    # Combine the scores with their respective car descriptions and sort
+    results = list(zip(boosted_scores, car_listings, car_data))
+    results = sorted(results, key=lambda x: x[0], reverse=True)
+    # Separate results into two tables based on the score threshold
+    high_score_results = [r for r in results if r[0] >= 0.5]  # Adjust threshold as needed
+    low_score_results = [r for r in results if 0.1 <= r[0] < 0.5]  # Adjust lower score threshold
+    # Format results for display as cards
+    response_html = ""
+    # High score results cards
+    if high_score_results:
+        response_html += "<h2>Here are your results</h2>"
+        response_html += "<div style='display:flex; flex-wrap:wrap;'>"
+        for score, car_description, car_info in high_score_results:
+            response_html += "<div style='border: 1px solid #ddd; border-radius: 5px; padding: 10px; margin: 10px; width: 300px;'>"
+            response_html += f"<img src='{car_info.get('image_url')}' width='100' style='float:left; margin-right: 10px;'>"
+            response_html += f"<h3>{car_info.get('title')}</h3>"
+            response_html += f"<p><b>Price:</b> {car_info.get('price')}</p>"
+            response_html += f"<p><b>Status:</b> {car_info.get('status')}</p>"
+            response_html += f"<p><b>Body Type:</b> {car_info.get('BodyType')}</p>"
+            response_html += f"<p><b>Year:</b> {car_info.get('year')}</p>"
+            response_html += f"<p><b>Mileage:</b> {car_info.get('Mileage')}</p>"
+            response_html += f"<p><b>Location:</b> {car_info.get('location')}</p>"
+            response_html += f"<a href='{car_info.get('link')}' target='_blank'>View Listing</a>"
+            response_html += f"<p><b>Similarity Score:</b> {score:.4f}</p>"
+            response_html += "</div>"
+        response_html += "</div>"
+    # Low score results cards
+    if low_score_results:
+        response_html += "<h2>Some more results you might be interested in</h2>"
+        response_html += "<div style='display:flex; flex-wrap:wrap;'>"
+        for score, car_description, car_info in low_score_results:
+            response_html += "<div style='border: 1px solid #ddd; border-radius: 5px; padding: 10px; margin: 10px; width: 300px;'>"
+            response_html += f"<img src='{car_info.get('image_url')}' width='100' style='float:left; margin-right: 10px;'>"
+            response_html += f"<h3>{car_info.get('title')}</h3>"
+            response_html += f"<p><b>Price:</b> {car_info.get('price')}</p>"
+            response_html += f"<p><b>Status:</b> {car_info.get('status')}</p>"
+            response_html += f"<p><b>Body Type:</b> {car_info.get('BodyType')}</p>"
+            response_html += f"<p><b>Year:</b> {car_info.get('year')}</p>"
+            response_html += f"<p><b>Mileage:</b> {car_info.get('Mileage')}</p>"
+            response_html += f"<p><b>Location:</b> {car_info.get('location')}</p>"
+            response_html += f"<a href='{car_info.get('link')}' target='_blank' style='text-decoration:none; color:#333; background-color:#f0f0f0; padding:8px 12px; border-radius:4px; margin-right: 10px;'>View Listing</a>"
+            response_html += f"<p><b>Similarity Score:</b> {score:.4f}</p>"
+            response_html += "</div>"
+        response_html += "</div>"
+    return response_html
+# Create a chat interface using Gradio's Blocks
+def create_chat_interface():
+    with gr.Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo:
+        with gr.Row():
+            gr.Markdown("## Ai Search")
+        with gr.Row():
+            query_input = gr.Textbox(label="What are you looking for?", placeholder="Please type your query here")
+        with gr.Row():
+            output_html = gr.HTML(label="Search Results")
+        query_input.change(fn=search_cars, inputs=query_input, outputs=output_html)
+    return demo
+# Launch the chat interface
+demo = create_chat_interface()
+demo.launch()