StandardCAS-NSTID's picture
Create Other Builds/Counter/1a1a.py
d42b500 verified
raw
history blame
6.84 kB
import requests
from bs4 import BeautifulSoup
import re
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
url = "https://www.deviantart.com/amber2024/gallery"
def get_values(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
spans = soup.findAll('span', class_="_3AClx")
favs = 0
comments = 0
views = 0
#print(spans)
# Iterate over each span in the list
c = 0
for span in spans:
# Extract the numeric value and the unit (Favourites, Comment, Views)
print('\n'+str(list(span))+str(c)+'\n')
value = str(list(span)[0]).strip('</span>')
unit = str(list(span)[2]).lstrip('abcdeghijklmnop qrstuvwxyz_1234567890N"=></').rstrip('/span>')
#print(value)
# Convert value to numeric format
if 'K' in value:
value = float(value[:-1]) * 1000
else:
#print(str(len(value))+'val'+value)
value = int(value)
print(unit)
# Check the unit and update the corresponding variable
if unit == 'Favourites<' or unit == 'Favourite':
favs = value
elif unit == 'Comments<' or unit == 'Comment<':
comments = value
elif unit == 'Views<' or unit == 'View':
views = value
c+=1
#print(favs,comments,views)
return (favs,comments,views)
def get_tags(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
spans = soup.findAll('span', class_="_1nwad")
tags = []
#print(spans)
# Iterate over each span in the list
c = 0
for span in spans:
tags.append(str(span).split('>')[1].split('<')[0])
print(tags,spans)
return tags
def get_links(url,page=1):
service = Service('/Users/osmond/Downloads/chromedriver-mac-arm64/chromedriver') # Path to chromedriver executable
driver = webdriver.Chrome(service=service)
driver.get(url+'?page='+str(page))
# Scroll to the bottom of the page
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(0) # Adjust sleep time as needed
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
soup = BeautifulSoup(driver.page_source, 'html.parser')
links = []
for link in soup.find_all('a'):
links.append(link.get('href'))
#driver.quit()
return links
drawings = []
names = []
def recursion(url):
global get_links, drawings, names
recur = []
cecant = get_links(url)#tmp
secant = False
cocecant = 1
cosecant = []
for i in cecant:
if '/all' in i and not '/all?' in i:
secant = True
recur.append(i)
if '?page=' in i:
cosecant.append(int(i.split('?page=')[1]))
print(cosecant,'cosecant')
recur = list(set(recur))
try:
cocecant = max(cosecant)
except:
print('Only One Page')
print(cocecant,'cocecant')
if secant != True:
for i in cecant:
if "/gallery/" in i:
recur.append(i)
print(recur,'reccc')
for j in recur:
cecant = get_links(j)#tmp
secant = False
cocecant = 1
cosecant = []
for i in cecant:
if '/all' in i and not '/all?' in i:
secant = True
recur.append(i)
if '?page=' in i:
cosecant.append(int(i.split('?page=')[1]))
recur = list(set(recur))
print(recur)
print(cosecant,'cosc')
try:
cocecant = max(cosecant)
except:
print('Only One Page')
for z in range(1,cocecant+1):
print(z)
x = get_links(j,page=z)
flag = False
alled = False #If there is a folder for All Deviations
for k in x:
if '/art' in k:
flag = True
break
if flag == True:
print(x,'xxxxxxxxx')
for c in x:
if "/art/" in c and not "#comments" in c and not c in drawings:
drawings.append(c)
names.append(c.split('/art/')[1])
else:
break
drawings = list(set(drawings))
#print(get_links(url))
recursion(url)
#print(drawings)
finalle = []
names = []
def recur_works():
global finalle
for i in drawings:
finalle.append(get_values(i))
import threading
drawings = list(set(drawings))
tag_sets = []
# Function to process one item from the drawings list
def process_item(item):
global tag_sets
finalle.append(get_values(item))
names.append(item.split('/art/')[1])
tag_sets.append(get_tags(item))
# Divide the drawings into chunks for each thread
num_threads = 1
chunk_size = len(drawings) // num_threads if len(drawings) % num_threads == 0 else len(drawings) // num_threads + 1
chunks = [drawings[i:i+chunk_size] for i in range(0, len(drawings), chunk_size)]
# Create and start worker threads
threads = []
for chunk in chunks:
for drawing in chunk:
# Create a new thread for each item (or group them per chunk as needed)
t = threading.Thread(target=process_item, args=(drawing,))
threads.append(t)
t.start()
# Wait for all threads to complete
for t in threads:
t.join()
def get_summation():
print(finalle)
favs = 0
comm = 0
view = 0
for i in finalle:
if i!=False:
favs += i[0]
comm += i[1]
view += i[2]
print('favs:',favs,'comm:',comm,'view:',view, 'names:', names)
def get_tag_summation():
post_processed_tags = []
indexx = []
for c in range(len(tag_sets)):
i = tag_sets[c]
for j in i:
if j in indexx:
post_processed_tags[indexx.index(j)][1] = list(post_processed_tags[indexx.index(j)][1])
post_processed_tags[indexx.index(j)][2] += 1
post_processed_tags[indexx.index(j)][1][0] += finalle[c][0]
post_processed_tags[indexx.index(j)][1][1] += finalle[c][1]
post_processed_tags[indexx.index(j)][1][2] += finalle[c][2]
else:
post_processed_tags.append([j, finalle[c], 1])
indexx.append(j)
return post_processed_tags
#recur_works()
get_summation()
e = get_tag_summation()
print(e)