StandardCAS-NSTID commited on
Commit
d42b500
·
verified ·
1 Parent(s): 3578bc4

Create Other Builds/Counter/1a1a.py

Browse files
Files changed (1) hide show
  1. Other Builds/Counter/1a1a.py +238 -0
Other Builds/Counter/1a1a.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import re
4
+ import time
5
+ from selenium import webdriver
6
+ from selenium.webdriver.common.keys import Keys
7
+ from selenium.webdriver.chrome.service import Service
8
+
9
+ url = "https://www.deviantart.com/amber2024/gallery"
10
+
11
+
12
+
13
+
14
+ def get_values(url):
15
+ response = requests.get(url)
16
+ soup = BeautifulSoup(response.content, 'html.parser')
17
+ spans = soup.findAll('span', class_="_3AClx")
18
+ favs = 0
19
+ comments = 0
20
+ views = 0
21
+ #print(spans)
22
+ # Iterate over each span in the list
23
+ c = 0
24
+ for span in spans:
25
+ # Extract the numeric value and the unit (Favourites, Comment, Views)
26
+ print('\n'+str(list(span))+str(c)+'\n')
27
+ value = str(list(span)[0]).strip('</span>')
28
+ unit = str(list(span)[2]).lstrip('abcdeghijklmnop qrstuvwxyz_1234567890N"=></').rstrip('/span>')
29
+ #print(value)
30
+ # Convert value to numeric format
31
+ if 'K' in value:
32
+ value = float(value[:-1]) * 1000
33
+ else:
34
+ #print(str(len(value))+'val'+value)
35
+ value = int(value)
36
+ print(unit)
37
+
38
+ # Check the unit and update the corresponding variable
39
+ if unit == 'Favourites<' or unit == 'Favourite':
40
+ favs = value
41
+ elif unit == 'Comments<' or unit == 'Comment<':
42
+ comments = value
43
+ elif unit == 'Views<' or unit == 'View':
44
+ views = value
45
+ c+=1
46
+ #print(favs,comments,views)
47
+ return (favs,comments,views)
48
+
49
+ def get_tags(url):
50
+ response = requests.get(url)
51
+ soup = BeautifulSoup(response.content, 'html.parser')
52
+ spans = soup.findAll('span', class_="_1nwad")
53
+ tags = []
54
+ #print(spans)
55
+ # Iterate over each span in the list
56
+ c = 0
57
+ for span in spans:
58
+ tags.append(str(span).split('>')[1].split('<')[0])
59
+ print(tags,spans)
60
+ return tags
61
+
62
+
63
+ def get_links(url,page=1):
64
+ service = Service('/Users/osmond/Downloads/chromedriver-mac-arm64/chromedriver') # Path to chromedriver executable
65
+ driver = webdriver.Chrome(service=service)
66
+ driver.get(url+'?page='+str(page))
67
+
68
+ # Scroll to the bottom of the page
69
+ last_height = driver.execute_script("return document.body.scrollHeight")
70
+ while True:
71
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
72
+ time.sleep(0) # Adjust sleep time as needed
73
+ new_height = driver.execute_script("return document.body.scrollHeight")
74
+ if new_height == last_height:
75
+ break
76
+
77
+ last_height = new_height
78
+
79
+ soup = BeautifulSoup(driver.page_source, 'html.parser')
80
+ links = []
81
+ for link in soup.find_all('a'):
82
+ links.append(link.get('href'))
83
+
84
+ #driver.quit()
85
+ return links
86
+
87
+
88
+
89
+ drawings = []
90
+ names = []
91
+
92
+ def recursion(url):
93
+ global get_links, drawings, names
94
+ recur = []
95
+ cecant = get_links(url)#tmp
96
+ secant = False
97
+ cocecant = 1
98
+ cosecant = []
99
+
100
+ for i in cecant:
101
+ if '/all' in i and not '/all?' in i:
102
+ secant = True
103
+ recur.append(i)
104
+ if '?page=' in i:
105
+ cosecant.append(int(i.split('?page=')[1]))
106
+ print(cosecant,'cosecant')
107
+ recur = list(set(recur))
108
+
109
+ try:
110
+ cocecant = max(cosecant)
111
+ except:
112
+ print('Only One Page')
113
+ print(cocecant,'cocecant')
114
+ if secant != True:
115
+ for i in cecant:
116
+ if "/gallery/" in i:
117
+ recur.append(i)
118
+
119
+ print(recur,'reccc')
120
+ for j in recur:
121
+ cecant = get_links(j)#tmp
122
+ secant = False
123
+ cocecant = 1
124
+ cosecant = []
125
+
126
+ for i in cecant:
127
+ if '/all' in i and not '/all?' in i:
128
+ secant = True
129
+ recur.append(i)
130
+ if '?page=' in i:
131
+ cosecant.append(int(i.split('?page=')[1]))
132
+ recur = list(set(recur))
133
+ print(recur)
134
+ print(cosecant,'cosc')
135
+ try:
136
+ cocecant = max(cosecant)
137
+ except:
138
+ print('Only One Page')
139
+
140
+ for z in range(1,cocecant+1):
141
+ print(z)
142
+ x = get_links(j,page=z)
143
+
144
+ flag = False
145
+ alled = False #If there is a folder for All Deviations
146
+ for k in x:
147
+ if '/art' in k:
148
+ flag = True
149
+ break
150
+
151
+ if flag == True:
152
+ print(x,'xxxxxxxxx')
153
+ for c in x:
154
+ if "/art/" in c and not "#comments" in c and not c in drawings:
155
+ drawings.append(c)
156
+ names.append(c.split('/art/')[1])
157
+ else:
158
+ break
159
+
160
+ drawings = list(set(drawings))
161
+
162
+ #print(get_links(url))
163
+ recursion(url)
164
+ #print(drawings)
165
+
166
+ finalle = []
167
+ names = []
168
+ def recur_works():
169
+ global finalle
170
+ for i in drawings:
171
+ finalle.append(get_values(i))
172
+
173
+ import threading
174
+
175
+ drawings = list(set(drawings))
176
+ tag_sets = []
177
+ # Function to process one item from the drawings list
178
+ def process_item(item):
179
+ global tag_sets
180
+ finalle.append(get_values(item))
181
+ names.append(item.split('/art/')[1])
182
+ tag_sets.append(get_tags(item))
183
+
184
+ # Divide the drawings into chunks for each thread
185
+ num_threads = 1
186
+ chunk_size = len(drawings) // num_threads if len(drawings) % num_threads == 0 else len(drawings) // num_threads + 1
187
+ chunks = [drawings[i:i+chunk_size] for i in range(0, len(drawings), chunk_size)]
188
+
189
+ # Create and start worker threads
190
+ threads = []
191
+ for chunk in chunks:
192
+ for drawing in chunk:
193
+ # Create a new thread for each item (or group them per chunk as needed)
194
+ t = threading.Thread(target=process_item, args=(drawing,))
195
+ threads.append(t)
196
+ t.start()
197
+
198
+ # Wait for all threads to complete
199
+ for t in threads:
200
+ t.join()
201
+
202
+
203
+ def get_summation():
204
+ print(finalle)
205
+ favs = 0
206
+ comm = 0
207
+ view = 0
208
+ for i in finalle:
209
+ if i!=False:
210
+ favs += i[0]
211
+ comm += i[1]
212
+ view += i[2]
213
+ print('favs:',favs,'comm:',comm,'view:',view, 'names:', names)
214
+
215
+ def get_tag_summation():
216
+ post_processed_tags = []
217
+ indexx = []
218
+ for c in range(len(tag_sets)):
219
+ i = tag_sets[c]
220
+ for j in i:
221
+ if j in indexx:
222
+ post_processed_tags[indexx.index(j)][1] = list(post_processed_tags[indexx.index(j)][1])
223
+ post_processed_tags[indexx.index(j)][2] += 1
224
+ post_processed_tags[indexx.index(j)][1][0] += finalle[c][0]
225
+ post_processed_tags[indexx.index(j)][1][1] += finalle[c][1]
226
+ post_processed_tags[indexx.index(j)][1][2] += finalle[c][2]
227
+
228
+ else:
229
+ post_processed_tags.append([j, finalle[c], 1])
230
+ indexx.append(j)
231
+ return post_processed_tags
232
+
233
+ #recur_works()
234
+ get_summation()
235
+ e = get_tag_summation()
236
+
237
+ print(e)
238
+