|
import requests |
|
from bs4 import BeautifulSoup |
|
import re |
|
import time |
|
from selenium import webdriver |
|
from selenium.webdriver.common.keys import Keys |
|
from selenium.webdriver.chrome.service import Service |
|
|
|
url = "https://www.deviantart.com/amber2024/gallery" |
|
|
|
|
|
|
|
|
|
def get_values(url): |
|
response = requests.get(url) |
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
spans = soup.findAll('span', class_="_3AClx") |
|
favs = 0 |
|
comments = 0 |
|
views = 0 |
|
|
|
|
|
c = 0 |
|
for span in spans: |
|
|
|
print('\n'+str(list(span))+str(c)+'\n') |
|
value = str(list(span)[0]).strip('</span>') |
|
unit = str(list(span)[2]).lstrip('abcdeghijklmnop qrstuvwxyz_1234567890N"=></').rstrip('/span>') |
|
|
|
|
|
if 'K' in value: |
|
value = float(value[:-1]) * 1000 |
|
else: |
|
|
|
value = int(value) |
|
print(unit) |
|
|
|
|
|
if unit == 'Favourites<' or unit == 'Favourite': |
|
favs = value |
|
elif unit == 'Comments<' or unit == 'Comment<': |
|
comments = value |
|
elif unit == 'Views<' or unit == 'View': |
|
views = value |
|
c+=1 |
|
|
|
return (favs,comments,views) |
|
|
|
def get_tags(url): |
|
response = requests.get(url) |
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
spans = soup.findAll('span', class_="_1nwad") |
|
tags = [] |
|
|
|
|
|
c = 0 |
|
for span in spans: |
|
tags.append(str(span).split('>')[1].split('<')[0]) |
|
print(tags,spans) |
|
return tags |
|
|
|
|
|
def get_links(url,page=1): |
|
service = Service('/Users/osmond/Downloads/chromedriver-mac-arm64/chromedriver') |
|
driver = webdriver.Chrome(service=service) |
|
driver.get(url+'?page='+str(page)) |
|
|
|
|
|
last_height = driver.execute_script("return document.body.scrollHeight") |
|
while True: |
|
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") |
|
time.sleep(0) |
|
new_height = driver.execute_script("return document.body.scrollHeight") |
|
if new_height == last_height: |
|
break |
|
|
|
last_height = new_height |
|
|
|
soup = BeautifulSoup(driver.page_source, 'html.parser') |
|
links = [] |
|
for link in soup.find_all('a'): |
|
links.append(link.get('href')) |
|
|
|
|
|
return links |
|
|
|
|
|
|
|
drawings = [] |
|
names = [] |
|
|
|
def recursion(url): |
|
global get_links, drawings, names |
|
recur = [] |
|
cecant = get_links(url) |
|
secant = False |
|
cocecant = 1 |
|
cosecant = [] |
|
|
|
for i in cecant: |
|
if '/all' in i and not '/all?' in i: |
|
secant = True |
|
recur.append(i) |
|
if '?page=' in i: |
|
cosecant.append(int(i.split('?page=')[1])) |
|
print(cosecant,'cosecant') |
|
recur = list(set(recur)) |
|
|
|
try: |
|
cocecant = max(cosecant) |
|
except: |
|
print('Only One Page') |
|
print(cocecant,'cocecant') |
|
if secant != True: |
|
for i in cecant: |
|
if "/gallery/" in i: |
|
recur.append(i) |
|
|
|
print(recur,'reccc') |
|
for j in recur: |
|
cecant = get_links(j) |
|
secant = False |
|
cocecant = 1 |
|
cosecant = [] |
|
|
|
for i in cecant: |
|
if '/all' in i and not '/all?' in i: |
|
secant = True |
|
recur.append(i) |
|
if '?page=' in i: |
|
cosecant.append(int(i.split('?page=')[1])) |
|
recur = list(set(recur)) |
|
print(recur) |
|
print(cosecant,'cosc') |
|
try: |
|
cocecant = max(cosecant) |
|
except: |
|
print('Only One Page') |
|
|
|
for z in range(1,cocecant+1): |
|
print(z) |
|
x = get_links(j,page=z) |
|
|
|
flag = False |
|
alled = False |
|
for k in x: |
|
if '/art' in k: |
|
flag = True |
|
break |
|
|
|
if flag == True: |
|
print(x,'xxxxxxxxx') |
|
for c in x: |
|
if "/art/" in c and not "#comments" in c and not c in drawings: |
|
drawings.append(c) |
|
names.append(c.split('/art/')[1]) |
|
else: |
|
break |
|
|
|
drawings = list(set(drawings)) |
|
|
|
|
|
recursion(url) |
|
|
|
|
|
finalle = [] |
|
names = [] |
|
def recur_works(): |
|
global finalle |
|
for i in drawings: |
|
finalle.append(get_values(i)) |
|
|
|
import threading |
|
|
|
drawings = list(set(drawings)) |
|
tag_sets = [] |
|
|
|
def process_item(item): |
|
global tag_sets |
|
finalle.append(get_values(item)) |
|
names.append(item.split('/art/')[1]) |
|
tag_sets.append(get_tags(item)) |
|
|
|
|
|
num_threads = 1 |
|
chunk_size = len(drawings) // num_threads if len(drawings) % num_threads == 0 else len(drawings) // num_threads + 1 |
|
chunks = [drawings[i:i+chunk_size] for i in range(0, len(drawings), chunk_size)] |
|
|
|
|
|
threads = [] |
|
for chunk in chunks: |
|
for drawing in chunk: |
|
|
|
t = threading.Thread(target=process_item, args=(drawing,)) |
|
threads.append(t) |
|
t.start() |
|
|
|
|
|
for t in threads: |
|
t.join() |
|
|
|
|
|
def get_summation(): |
|
print(finalle) |
|
favs = 0 |
|
comm = 0 |
|
view = 0 |
|
for i in finalle: |
|
if i!=False: |
|
favs += i[0] |
|
comm += i[1] |
|
view += i[2] |
|
print('favs:',favs,'comm:',comm,'view:',view, 'names:', names) |
|
|
|
def get_tag_summation(): |
|
post_processed_tags = [] |
|
indexx = [] |
|
for c in range(len(tag_sets)): |
|
i = tag_sets[c] |
|
for j in i: |
|
if j in indexx: |
|
post_processed_tags[indexx.index(j)][1] = list(post_processed_tags[indexx.index(j)][1]) |
|
post_processed_tags[indexx.index(j)][2] += 1 |
|
post_processed_tags[indexx.index(j)][1][0] += finalle[c][0] |
|
post_processed_tags[indexx.index(j)][1][1] += finalle[c][1] |
|
post_processed_tags[indexx.index(j)][1][2] += finalle[c][2] |
|
|
|
else: |
|
post_processed_tags.append([j, finalle[c], 1]) |
|
indexx.append(j) |
|
return post_processed_tags |
|
|
|
|
|
get_summation() |
|
e = get_tag_summation() |
|
|
|
print(e) |
|
|
|
|