Spaces:

BinuraYasodya
/

Facebook-ad-search

Runtime error

App Files Files Community

Facebook-ad-search / main.py

BinuraYasodya

Upload 4 files

6c34694 verified 12 months ago

raw

history blame contribute delete

3.98 kB

	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.service import Service as ChromeService
	from webdriver_manager.chrome import ChromeDriverManager
	import time
	from bs4 import BeautifulSoup
	import pandas as pd
	import numpy as np
	from sqlalchemy import create_engine

	class AdScraper:
	def __init__(self):
	self.driver = None
	self.ad_details = []
	self.companies_urls = {
	"Airtel India": "https://www.facebook.com/ads/library/?active_status=active&ad_type=all&country=ALL&view_all_page_id=147351511955143&sort_data[direction]=desc&sort_data[mode]=relevancy_monthly_grouped&search_type=page&media_type=all",
	"Celcom Malaysia": "https://www.facebook.com/ads/library/?active_status=active&ad_type=all&country=ALL&view_all_page_id=103384636066809&sort_data[direction]=desc&sort_data[mode]=relevancy_monthly_grouped&search_type=page&media_type=all",
	"Vodafone UK": "https://www.facebook.com/ads/library/?active_status=active&ad_type=all&country=ALL&view_all_page_id=67884984384&sort_data[direction]=desc&sort_data[mode]=relevancy_monthly_grouped&search_type=page&media_type=all",
	"T-mobile Polska": "https://www.facebook.com/ads/library/?active_status=active&ad_type=all&country=ALL&view_all_page_id=166466416745074&sort_data[direction]=desc&sort_data[mode]=relevancy_monthly_grouped&search_type=page&media_type=all"
	}

	def setup_driver(self):
	options = webdriver.ChromeOptions()
	options.headless = True
	self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)

	def scroll_page(self):
	last_height = self.driver.execute_script("return document.body.scrollHeight")
	while True:
	self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
	time.sleep(6)
	new_height = self.driver.execute_script("return document.body.scrollHeight")
	if new_height == last_height:
	break
	last_height = new_height

	def parse_ads(self, soup, company):
	ads = soup.find_all('div', class_='xh8yej3')
	for ad in ads:
	ad_text = ad.find('div', class_='x6ikm8r x10wlt62').text if ad.find('div', class_='x6ikm8r x10wlt62') else 'N/A'
	ad_status = ad.find('span', class_='x8t9es0 xw23nyj xo1l8bm x63nzvj x108nfp6 xq9mrsl x1h4wwuj xeuugli x1i64zmx').text if ad.find('span', class_='x8t9es0 xw23nyj xo1l8bm x63nzvj x108nfp6 xq9mrsl x1h4wwuj xeuugli x1i64zmx') else 'N/A'
	img_tag = ad.find('img', class_='x1ll5gia x19kjcj4 xh8yej3')
	video_tag = ad.find('video', class_='x1lliihq x5yr21d xh8yej3')
	artwork_link = img_tag['src'] if img_tag else video_tag['src'] if video_tag else 'N/A'
	self.ad_details.append({'Company name': company, 'Ad Text': ad_text, 'Ad status': ad_status, 'Artwork Link': artwork_link})

	def scrape_ads(self):
	self.setup_driver()
	for company, url in self.companies_urls.items():
	self.driver.get(url)
	self.scroll_page()
	soup = BeautifulSoup(self.driver.page_source, 'html.parser')
	self.parse_ads(soup, company)
	self.driver.quit()

	def process_data(self):
	df = pd.DataFrame(self.ad_details)
	df.replace('N/A', np.nan, inplace=True)
	df.dropna(how='all', inplace=True)
	df.drop_duplicates(subset=['Artwork Link'], inplace=True)
	return df

	def upload_data(self, df):
	engine = create_engine('mysql+pymysql://root:Binu1997#$@localhost/research_db')
	df.to_sql('ads_table', engine, if_exists='replace', index=False)
	print("Data uploaded successfully!")

	if __name__ == "__main__":
	scraper = AdScraper()
	scraper.scrape_ads()
	data = scraper.process_data()
	print("\nDataFrame with duplicates removed:\n", data)
	scraper.upload_data(data)