SilviuMatei's picture
Update app.py
297bea8 verified
raw
history blame
3 kB
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
import requests
from bs4 import BeautifulSoup
import pandas as pd
import yaml
from tools.final_answer import FinalAnswerTool
@tool
def amazon_product_scraper(search_url: str) -> tuple[pd.DataFrame, str]:
"""
Scrapes Amazon search results for product titles, prices, delivery fees, and links.
Arguments:
search_url (str): The URL of the Amazon search results page.
Returns:
tuple[pd.DataFrame, str]: A tuple containing a sorted Pandas DataFrame of products and a recommendation string.
"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
}
response = requests.get(search_url, headers=headers)
if response.status_code != 200:
return None, "Failed to retrieve Amazon results. Amazon may be blocking requests."
soup = BeautifulSoup(response.text, 'html.parser')
product_data = []
for item in soup.select("div[data-asin]"):
title_tag = item.select_one("h2 a")
price_tag = item.select_one("span.a-price-whole")
delivery_tag = item.select_one("span.s-align-children-center")
if title_tag and price_tag:
title = title_tag.text.strip()
price = price_tag.text.strip().replace(',', '') # Normalize prices
delivery = delivery_tag.text.strip() if delivery_tag else "Free"
link = "https://www.amazon.com" + title_tag["href"]
product_data.append({
"Title": title,
"Price": float(price) if price.isnumeric() else None,
"Delivery": delivery,
"Link": link
})
# Filter out products with no price and sort by price
product_data = [p for p in product_data if p["Price"] is not None]
product_data.sort(key=lambda x: x["Price"])
# Convert to DataFrame for better visualization
df = pd.DataFrame(product_data)
# Recommendation logic
best_deal = df.iloc[0] if not df.empty else None
recommendation = ""
if best_deal is not None:
recommendation = f"Best deal: {best_deal['Title']} at ${best_deal['Price']} with {best_deal['Delivery']} (Link: {best_deal['Link']})"
return df, recommendation
# Define the Agent
final_answer = FinalAnswerTool()
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[final_answer, amazon_product_scraper], # Adding the scraper tool
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()