Spaces:

te
/

colombia_birds_quiz

Runtime error

colombia_birds_quiz / scrapper2.py

first commit

b152d54 over 1 year ago

1.02 kB

	from selenium import webdriver
	from bs4 import BeautifulSoup

	url = "https://search.macaulaylibrary.org/catalog?regionCode=CO-ANT&taxonCode=bltfly1&sort=rating_rank_desc&mediaType=photo"

	# Set up the ChromeDriver (you need to have ChromeDriver installed and its path added to your system PATH)
	driver = webdriver.Chrome()

	# Load the webpage
	driver.get(url)

	# Get the page source after the dynamic content has loaded
	page_source = driver.page_source

	# Parse the HTML content
	soup = BeautifulSoup(page_source, 'html.parser')

	# Now you can use BeautifulSoup to extract information from the HTML
	# For example, let's extract all image URLs
	# print(soup.find_all('img'))

	image_urls = [img['src'] for img in soup.find_all('img')]


	# Print the extracted image URLs
	for i, url in enumerate(image_urls, start=1):
	print(f"Image {i}: {url}")
	print(url.split('/asset/')[-1].split('/')[0])
	break

	# Close the browser window
	driver.quit()



	def get_image_asset(url):

	return url.split('/asset/')[-1].split('/')[0]