Spaces:
No application file
No application file
# import os | |
# css_file = "newFile.css" | |
# print(f'file:{os.getcwd()}{css_file}') | |
import requests | |
from bs4 import BeautifulSoup | |
import shutil | |
def download_images(url, selector): | |
response = requests.get(url, stream=True, headers={'User-agent': 'Mozilla/6.0'}) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# print(soup) | |
img_tags = [] | |
modifiedSelector = selector[1:] | |
if selector[0] == '.': | |
img_tags += soup.select_one(f'div{selector}').find_all('img') | |
else: | |
img_tags += soup.find(id = modifiedSelector).find_all('img') | |
try: | |
print(img_tags[0]['data-src']) | |
except Exception as e: | |
print(e) | |
# if(img_tags[0]['data-src']): | |
# print(img_tags[0]['data-src']) | |
# else: | |
# print('Yo') | |
# for img in img_tags: | |
# # Split URL by the last backslash | |
# split_url = img['src'].rsplit('/', 1) | |
# file_name_with_query = split_url[1] | |
# # Search for the file extension | |
# extensions = ['.jpg', '.png', '.webp'] | |
# img_name = file_name_with_query | |
# for extension in extensions: | |
# if extension in file_name_with_query: | |
# img_name = file_name_with_query.split(extension)[0] + extension | |
# break | |
# r = requests.get(img['src'], stream=True, headers={'User-agent': 'Mozilla/6.0'}) | |
# if r.status_code == 200: | |
# with open(img_name, 'wb') as f: | |
# r.raw.decode_content = True | |
# shutil.copyfileobj(r.raw, f) | |
# else: | |
# print(r.status_code) | |
# try: | |
# urlretrieve(img['src'], img_name) | |
# # print(f"Downloaded {img_name}") | |
# except HTTPError: | |
# urlretrieve(img_url, img_name) | |
# # except Exception as e: | |
# # print(f"Failed to download {img_name}: {str(e)}") | |
url = input("URL: ") | |
selector = input("Selector: ") | |
download_images(url,selector) | |
# # Usage example | |
# download_images('https://www.elixirsolutions.com/', element_class='columncontainer') | |