sagar007 commited on
Commit
ddd5812
·
verified ·
1 Parent(s): 99362db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -3
app.py CHANGED
@@ -2,13 +2,14 @@ import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
5
- import chromedriver_autoinstaller
6
  from selenium import webdriver
7
  from selenium.webdriver.chrome.options import Options
8
  import csv
9
  import random
10
  import time
11
  import os
 
 
12
 
13
  # List of user agents to avoid bot detection
14
  USER_AGENTS = [
@@ -17,7 +18,27 @@ USER_AGENTS = [
17
  "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0"
18
  ]
19
 
20
- # Function to initialize Selenium driver (headless)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def get_driver():
22
  chrome_options = Options()
23
  chrome_options.add_argument("--headless") # Run in headless mode
@@ -29,13 +50,17 @@ def get_driver():
29
  # Function to scrape Flipkart laptop data
30
  def scrape_flipkart(url):
31
  try:
 
 
 
 
32
  # Set up Selenium driver
33
  driver = get_driver()
34
  headers = {"User-Agent": random.choice(USER_AGENTS)}
35
 
36
  # Load the page
37
  driver.get(url)
38
- time.sleep(5) # Wait for JavaScript to load content
39
 
40
  # Get page source and parse with BeautifulSoup
41
  soup = BeautifulSoup(driver.page_source, "html.parser")
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
 
5
  from selenium import webdriver
6
  from selenium.webdriver.chrome.options import Options
7
  import csv
8
  import random
9
  import time
10
  import os
11
+ import subprocess
12
+ import chromedriver_autoinstaller
13
 
14
  # List of user agents to avoid bot detection
15
  USER_AGENTS = [
 
18
  "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0"
19
  ]
20
 
21
+ # Install Chrome and dependencies dynamically
22
+ def install_chrome():
23
+ try:
24
+ # Update package list and install Chrome dependencies
25
+ subprocess.run("apt-get update", shell=True, check=True)
26
+ subprocess.run(
27
+ "apt-get install -y libxss1 libappindicator1 libindicator7 fonts-liberation libnss3 xdg-utils unzip",
28
+ shell=True, check=True
29
+ )
30
+ # Download and install Chrome
31
+ subprocess.run(
32
+ "wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb",
33
+ shell=True, check=True
34
+ )
35
+ subprocess.run("dpkg -i google-chrome-stable_current_amd64.deb || apt-get install -f -y", shell=True, check=True)
36
+ # Install ChromeDriver
37
+ chromedriver_autoinstaller.install()
38
+ except Exception as e:
39
+ raise Exception(f"Failed to install Chrome: {str(e)}")
40
+
41
+ # Function to initialize Selenium driver
42
  def get_driver():
43
  chrome_options = Options()
44
  chrome_options.add_argument("--headless") # Run in headless mode
 
50
  # Function to scrape Flipkart laptop data
51
  def scrape_flipkart(url):
52
  try:
53
+ # Ensure Chrome is installed
54
+ if not os.path.exists("/usr/bin/google-chrome"):
55
+ install_chrome()
56
+
57
  # Set up Selenium driver
58
  driver = get_driver()
59
  headers = {"User-Agent": random.choice(USER_AGENTS)}
60
 
61
  # Load the page
62
  driver.get(url)
63
+ time.sleep(5) # Wait for JavaScript to load
64
 
65
  # Get page source and parse with BeautifulSoup
66
  soup = BeautifulSoup(driver.page_source, "html.parser")