Spaces:

sagar007
/

game

Running

App Files Files Community

game / app.py

sagar007

Update app.py

ddd5812 verified 2 months ago

raw

history blame

4.55 kB

	import gradio as gr
	import requests
	from bs4 import BeautifulSoup
	import pandas as pd
	from selenium import webdriver
	from selenium.webdriver.chrome.options import Options
	import csv
	import random
	import time
	import os
	import subprocess
	import chromedriver_autoinstaller

	# List of user agents to avoid bot detection
	USER_AGENTS = [
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
	"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0"
	]

	# Install Chrome and dependencies dynamically
	def install_chrome():
	try:
	# Update package list and install Chrome dependencies
	subprocess.run("apt-get update", shell=True, check=True)
	subprocess.run(
	"apt-get install -y libxss1 libappindicator1 libindicator7 fonts-liberation libnss3 xdg-utils unzip",
	shell=True, check=True
	)
	# Download and install Chrome
	subprocess.run(
	"wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb",
	shell=True, check=True
	)
	subprocess.run("dpkg -i google-chrome-stable_current_amd64.deb \|\| apt-get install -f -y", shell=True, check=True)
	# Install ChromeDriver
	chromedriver_autoinstaller.install()
	except Exception as e:
	raise Exception(f"Failed to install Chrome: {str(e)}")

	# Function to initialize Selenium driver
	def get_driver():
	chrome_options = Options()
	chrome_options.add_argument("--headless") # Run in headless mode
	chrome_options.add_argument("--no-sandbox")
	chrome_options.add_argument("--disable-dev-shm-usage")
	driver = webdriver.Chrome(options=chrome_options)
	return driver

	# Function to scrape Flipkart laptop data
	def scrape_flipkart(url):
	try:
	# Ensure Chrome is installed
	if not os.path.exists("/usr/bin/google-chrome"):
	install_chrome()

	# Set up Selenium driver
	driver = get_driver()
	headers = {"User-Agent": random.choice(USER_AGENTS)}

	# Load the page
	driver.get(url)
	time.sleep(5) # Wait for JavaScript to load

	# Get page source and parse with BeautifulSoup
	soup = BeautifulSoup(driver.page_source, "html.parser")
	driver.quit() # Close the driver

	# Lists to store scraped data
	products = []
	prices = []
	ratings = []

	# Find laptop items (adjust class names based on Flipkart's HTML structure)
	items = soup.find_all("div", class_="_1AtVbE") # Parent div for each product
	for item in items:
	# Product name
	name_tag = item.find("div", class_="_4rR01T")
	name = name_tag.text.strip() if name_tag else "N/A"

	# Price
	price_tag = item.find("div", class_="_30jeq3")
	price = price_tag.text.strip() if price_tag else "N/A"

	# Rating
	rating_tag = item.find("div", class_="_3LWZlK")
	rating = rating_tag.text.strip() if rating_tag else "N/A"

	if name != "N/A": # Only append valid entries
	products.append(name)
	prices.append(price)
	ratings.append(rating)

	# Create DataFrame
	df = pd.DataFrame({
	"Product Name": products,
	"Price": prices,
	"Rating": ratings
	})

	# Save to CSV
	csv_path = "flipkart_laptops.csv"
	df.to_csv(csv_path, index=False, encoding="utf-8")

	return f"Scraped {len(products)} laptops successfully!", csv_path

	except Exception as e:
	return f"Error: {str(e)}", None

	# Gradio interface
	with gr.Blocks(title="Flipkart Laptop Scraper") as demo:
	gr.Markdown("# Flipkart Laptop Scraper")
	gr.Markdown("Enter a Flipkart laptop category URL to scrape data and download as CSV.")

	url_input = gr.Textbox(label="Flipkart URL", placeholder="e.g., https://www.flipkart.com/laptops/pr?sid=6bo,b5g")
	scrape_btn = gr.Button("Scrape Data")
	output_text = gr.Textbox(label="Status")
	output_file = gr.File(label="Download CSV")

	scrape_btn.click(
	fn=scrape_flipkart,
	inputs=url_input,
	outputs=[output_text, output_file]
	)

	demo.launch()