imseldrith commited on
Commit
7da0946
·
verified ·
1 Parent(s): 9382654

Delete base.py

Browse files
Files changed (1) hide show
  1. base.py +0 -1067
base.py DELETED
@@ -1,1067 +0,0 @@
1
- import json
2
- import os
3
- import re
4
- import sys
5
- import threading
6
- import time
7
- import traceback
8
- from datetime import datetime, timezone
9
- from decimal import Decimal
10
- from urllib.parse import parse_qs, unquote, urlparse, urlsplit, urlunparse
11
-
12
- import cloudscraper
13
- import requests
14
- import rookiepy
15
- from bs4 import BeautifulSoup as bs
16
-
17
- from colors import fb, fc, fg, flb, flg, fm, fr, fy
18
-
19
- VERSION = "v2.3.2"
20
-
21
- scraper_dict: dict = {
22
- "Udemy Freebies": "uf",
23
- "Tutorial Bar": "tb",
24
- "Real Discount": "rd",
25
- "Course Vania": "cv",
26
- "IDownloadCoupons": "idc",
27
- "E-next": "en",
28
- "Discudemy": "du",
29
- }
30
-
31
- LINKS = {
32
- "github": "https://github.com/techtanic/Discounted-Udemy-Course-Enroller",
33
- "support": "https://techtanic.github.io/duce/support",
34
- "discord": "https://discord.gg/wFsfhJh4Rh",
35
- }
36
-
37
-
38
- class LoginException(Exception):
39
- """Login Error
40
-
41
- Args:
42
- Exception (str): Exception Reason
43
- """
44
-
45
- pass
46
-
47
-
48
- class RaisingThread(threading.Thread):
49
- def run(self):
50
- self._exc = None
51
- try:
52
- super().run()
53
- except Exception as e:
54
- self._exc = e
55
-
56
- def join(self, timeout=None):
57
- super().join(timeout=timeout)
58
- if self._exc:
59
- raise self._exc
60
-
61
-
62
- def resource_path(relative_path):
63
- if hasattr(sys, "_MEIPASS"):
64
- return os.path.join(sys._MEIPASS, relative_path)
65
- return os.path.join(os.path.abspath("."), relative_path)
66
-
67
-
68
- class Scraper:
69
- """
70
- Scrapers: RD,TB, CV, IDC, EN, DU, UF
71
- """
72
-
73
- def __init__(
74
- self,
75
- site_to_scrape: list = list(scraper_dict.keys()),
76
- debug: bool = False,
77
- ):
78
- self.sites = site_to_scrape
79
- self.debug = debug
80
- for site in self.sites:
81
- code_name = scraper_dict[site]
82
- setattr(self, f"{code_name}_length", 0)
83
- setattr(self, f"{code_name}_data", [])
84
- setattr(self, f"{code_name}_done", False)
85
- setattr(self, f"{code_name}_progress", 0)
86
- setattr(self, f"{code_name}_error", "")
87
-
88
- def get_scraped_courses(self, target: object) -> list:
89
- threads = []
90
- scraped_data = {}
91
- for site in self.sites:
92
- t = threading.Thread(
93
- target=target,
94
- args=(site,),
95
- daemon=True,
96
- )
97
- t.start()
98
- threads.append(t)
99
- time.sleep(0.2)
100
- for t in threads:
101
- t.join()
102
- for site in self.sites:
103
- scraped_data[site] = getattr(self, f"{scraper_dict[site]}_data")
104
- return scraped_data
105
-
106
- def append_to_list(self, target: list, title: str, link: str):
107
- target.append((title, link))
108
-
109
- def fetch_page_content(self, url: str, headers: dict = None) -> bytes:
110
- return requests.get(url, headers=headers).content
111
-
112
- def parse_html(self, content: str):
113
- return bs(content, "html5lib")
114
-
115
- def handle_exception(self, site_code: str):
116
- setattr(self, f"{site_code}_error", traceback.format_exc())
117
- setattr(self, f"{site_code}_length", -1)
118
- setattr(self, f"{site_code}_done", True)
119
- if self.debug:
120
- print(getattr(self, f"{site_code}_error"))
121
-
122
- def cleanup_link(self, link: str) -> str:
123
- parsed_url = urlparse(link)
124
-
125
- if parsed_url.netloc == "www.udemy.com":
126
- return link
127
-
128
- if parsed_url.netloc == "click.linksynergy.com":
129
- query_params = parse_qs(parsed_url.query)
130
-
131
- if "RD_PARM1" in query_params:
132
- return unquote(query_params["RD_PARM1"][0])
133
- elif "murl" in query_params:
134
- return unquote(query_params["murl"][0])
135
- else:
136
- return ""
137
- raise ValueError(f"Unknown link format: {link}")
138
-
139
- def du(self):
140
- try:
141
- all_items = []
142
- head = {
143
- "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36 Edg/92.0.902.84",
144
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
145
- }
146
-
147
- for page in range(1, 4):
148
- content = self.fetch_page_content(
149
- f"https://www.discudemy.com/all/{page}", headers=head
150
- )
151
- soup = self.parse_html(content)
152
- page_items = soup.find_all("a", {"class": "card-header"})
153
- all_items.extend(page_items)
154
- self.du_length = len(all_items)
155
- if self.debug:
156
- print("Length:", self.du_length)
157
- for index, item in enumerate(all_items):
158
- self.du_progress = index
159
- title = item.string
160
- url = item["href"].split("/")[-1]
161
- content = self.fetch_page_content(
162
- f"https://www.discudemy.com/go/{url}", headers=head
163
- )
164
- soup = self.parse_html(content)
165
- link = soup.find("div", {"class": "ui segment"}).a["href"]
166
- if self.debug:
167
- print(title, link)
168
- self.append_to_list(self.du_data, title, link)
169
-
170
- except:
171
- self.handle_exception("du")
172
- self.du_done = True
173
- if self.debug:
174
- print("Return Length:", len(self.du_data))
175
-
176
- def uf(self):
177
- try:
178
- all_items = []
179
- for page in range(1, 4):
180
- content = self.fetch_page_content(
181
- f"https://www.udemyfreebies.com/free-udemy-courses/{page}"
182
- )
183
- soup = self.parse_html(content)
184
- page_items = soup.find_all("a", {"class": "theme-img"})
185
- all_items.extend(page_items)
186
- self.uf_length = len(all_items)
187
- if self.debug:
188
- print("Length:", self.uf_length)
189
- for index, item in enumerate(all_items):
190
- title = item.img["alt"]
191
- link = requests.get(
192
- f"https://www.udemyfreebies.com/out/{item['href'].split('/')[4]}"
193
- ).url
194
- self.append_to_list(self.uf_data, title, link)
195
- self.uf_progress = index
196
-
197
- except:
198
- self.handle_exception("uf")
199
- self.uf_done = True
200
- if self.debug:
201
- print("Return Length:", len(self.uf_data))
202
-
203
- def tb(self):
204
- try:
205
- all_items = []
206
-
207
- for page in range(1, 5):
208
- content = self.fetch_page_content(
209
- f"https://www.tutorialbar.com/all-courses/page/{page}"
210
- )
211
- soup = self.parse_html(content)
212
- page_items = soup.find_all(
213
- "h2", class_="mb15 mt0 font110 mobfont100 fontnormal lineheight20"
214
- )
215
- all_items.extend(page_items)
216
- self.tb_length = len(all_items)
217
- if self.debug:
218
- print("Length:", self.tb_length)
219
-
220
- for index, item in enumerate(all_items):
221
- self.tb_progress = index
222
- title = item.a.string
223
- url = item.a["href"]
224
- content = self.fetch_page_content(url)
225
- soup = self.parse_html(content)
226
- link = soup.find("a", class_="btn_offer_block re_track_btn")["href"]
227
- if "www.udemy.com" in link:
228
- self.append_to_list(self.tb_data, title, link)
229
-
230
- except:
231
- self.handle_exception("tb")
232
- self.tb_done = True
233
- if self.debug:
234
- print("Return Length:", len(self.tb_data))
235
-
236
- def rd(self):
237
- all_items = []
238
-
239
- try:
240
- headers = {
241
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36 Edg/92.0.902.84",
242
- "Host": "cdn.real.discount",
243
- "Connection": "Keep-Alive",
244
- "dnt": "1",
245
- "referer": "https://www.real.discount/",
246
- }
247
- try:
248
- r = requests.get(
249
- "https://cdn.real.discount/api/courses?page=1&limit=500&sortBy=sale_start&store=Udemy&freeOnly=true",
250
- headers=headers,
251
- timeout=(10, 30),
252
- ).json()
253
- except requests.exceptions.Timeout:
254
- self.rd_error = "Timeout"
255
- self.rd_length = -1
256
- self.rd_done = True
257
- return
258
- all_items.extend(r["items"])
259
-
260
- self.rd_length = len(all_items)
261
- if self.debug:
262
- print("Length:", self.rd_length)
263
- for index, item in enumerate(all_items):
264
- self.rd_progress = index
265
- title: str = item["name"]
266
- link: str = item["url"]
267
- link = self.cleanup_link(link)
268
- if link:
269
- self.append_to_list(self.rd_data, title, link)
270
-
271
- except:
272
- self.handle_exception("rd")
273
- if self.debug:
274
- print("Return Length:", len(self.rd_data))
275
- self.rd_done = True
276
-
277
- def cv(self):
278
- try:
279
- content = self.fetch_page_content("https://coursevania.com/courses/")
280
- soup = self.parse_html(content)
281
- try:
282
- nonce = json.loads(
283
- re.search(
284
- r"var stm_lms_nonces = ({.*?});", soup.text, re.DOTALL
285
- ).group(1)
286
- )["load_content"]
287
- if self.debug:
288
- print("Nonce:", nonce)
289
- except IndexError:
290
- self.cv_error = "Nonce not found"
291
- self.cv_length = -1
292
- self.cv_done = True
293
- return
294
- r = requests.get(
295
- "https://coursevania.com/wp-admin/admin-ajax.php?&template=courses/grid&args={%22posts_per_page%22:%2260%22}&action=stm_lms_load_content&nonce="
296
- + nonce
297
- + "&sort=date_high"
298
- ).json()
299
-
300
- soup = self.parse_html(r["content"])
301
- page_items = soup.find_all(
302
- "div", {"class": "stm_lms_courses__single--title"}
303
- )
304
- self.cv_length = len(page_items)
305
- if self.debug:
306
- print("Small Length:", self.cv_length)
307
- for index, item in enumerate(page_items):
308
- self.cv_progress = index
309
- title = item.h5.string
310
- content = self.fetch_page_content(item.a["href"])
311
- soup = self.parse_html(content)
312
- link = soup.find(
313
- "a",
314
- {"class": "masterstudy-button-affiliate__link"},
315
- )["href"]
316
- self.append_to_list(self.cv_data, title, link)
317
-
318
- except:
319
- self.handle_exception("cv")
320
- self.cv_done = True
321
- if self.debug:
322
- print("Return Length:", len(self.cv_data))
323
-
324
- def idc(self):
325
- try:
326
- all_items = []
327
- for page in range(1, 5):
328
- content = self.fetch_page_content(
329
- f"https://idownloadcoupon.com/product-category/udemy/page/{page}"
330
- )
331
- soup = self.parse_html(content)
332
- page_items = soup.find_all(
333
- "a",
334
- attrs={
335
- "class": "woocommerce-LoopProduct-link woocommerce-loop-product__link"
336
- },
337
- )
338
- all_items.extend(page_items)
339
- self.idc_length = len(all_items)
340
- if self.debug:
341
- print("Length:", self.idc_length)
342
- for index, item in enumerate(all_items):
343
- self.idc_progress = index
344
- title = item.h2.string
345
- link_num = item["href"].split("/")[4]
346
- if link_num == "85":
347
- continue
348
- link = f"https://idownloadcoupon.com/udemy/{link_num}/"
349
-
350
- r = requests.get(
351
- link,
352
- allow_redirects=False,
353
- )
354
- link = unquote(r.headers["Location"])
355
- link = self.cleanup_link(link)
356
- self.append_to_list(self.idc_data, title, link)
357
-
358
- except:
359
- self.handle_exception("idc")
360
- self.idc_done = True
361
- if self.debug:
362
- print("Return Length:", len(self.idc_data))
363
-
364
- def en(self):
365
- try:
366
- all_items = []
367
- for page in range(1, 6):
368
- content = self.fetch_page_content(
369
- f"https://jobs.e-next.in/course/udemy/{page}"
370
- )
371
- soup = self.parse_html(content)
372
- page_items = soup.find_all(
373
- "a", {"class": "btn btn-secondary btn-sm btn-block"}
374
- )
375
- all_items.extend(page_items)
376
-
377
- self.en_length = len(all_items)
378
-
379
- if self.debug:
380
- print("Length:", self.en_length)
381
- for index, item in enumerate(all_items):
382
- self.en_progress = index
383
- content = self.fetch_page_content(item["href"])
384
- soup = self.parse_html(content)
385
- title = soup.find("h3").string.strip()
386
- link = soup.find("a", {"class": "btn btn-primary"})["href"]
387
- self.append_to_list(self.en_data, title, link)
388
-
389
- except:
390
- self.handle_exception("en")
391
- self.en_done = True
392
- if self.debug:
393
- print("Return Length:", len(self.en_data))
394
- print(self.en_data)
395
-
396
-
397
- class Udemy:
398
- def __init__(self, interface: str, debug: bool = False):
399
- self.interface = interface
400
- # self.client = cloudscraper.CloudScraper()
401
- self.client = requests.session()
402
- headers = {
403
- "User-Agent": "okhttp/4.9.2 UdemyAndroid 8.9.2(499) (phone)",
404
- "Accept": "application/json, text/plain, */*",
405
- "Accept-Language": "en-GB,en;q=0.5",
406
- "Referer": "https://www.udemy.com/",
407
- "X-Requested-With": "XMLHttpRequest",
408
- "DNT": "1",
409
- "Connection": "keep-alive",
410
- "Sec-Fetch-Dest": "empty",
411
- "Sec-Fetch-Mode": "cors",
412
- "Sec-Fetch-Site": "same-origin",
413
- "Pragma": "no-cache",
414
- "Cache-Control": "no-cache",
415
- }
416
-
417
- self.client.headers.update(headers)
418
- self.debug = debug
419
-
420
- def print(self, content: str, color: str = "red", **kargs):
421
- content = str(content)
422
- colours_dict = {
423
- "yellow": fy,
424
- "red": fr,
425
- "blue": fb,
426
- "light blue": flb,
427
- "green": fg,
428
- "light green": flg,
429
- "cyan": fc,
430
- "magenta": fm,
431
- }
432
- if self.interface == "gui":
433
- self.window["out"].print(content, text_color=color, **kargs)
434
- else:
435
- print(colours_dict[color] + content, **kargs)
436
-
437
- def get_date_from_utc(self, d: str):
438
- utc_dt = datetime.strptime(d, "%Y-%m-%dT%H:%M:%SZ")
439
- dt = utc_dt.replace(tzinfo=timezone.utc).astimezone(tz=None)
440
- return dt.strftime("%B %d, %Y")
441
-
442
- def get_now_to_utc(self):
443
- return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
444
-
445
- def load_settings(self):
446
- try:
447
- with open(f"duce-{self.interface}-settings.json") as f:
448
- self.settings = json.load(f)
449
- except FileNotFoundError:
450
- with open(
451
- resource_path(f"default-duce-{self.interface}-settings.json")
452
- ) as f:
453
- self.settings = json.load(f)
454
- if (
455
- self.interface == "cli" and "use_browser_cookies" not in self.settings
456
- ): # v2.1
457
- self.settings.get("use_browser_cookies", False)
458
- # v2.2
459
- if "course_update_threshold_months" not in self.settings:
460
- self.settings["course_update_threshold_months"] = 24 # 2 years
461
-
462
- self.settings["languages"] = dict(
463
- sorted(self.settings["languages"].items(), key=lambda item: item[0])
464
- )
465
- self.save_settings()
466
- self.title_exclude = "\n".join(self.settings["title_exclude"])
467
- self.instructor_exclude = "\n".join(self.settings["instructor_exclude"])
468
-
469
- def save_settings(self):
470
- with open(f"duce-{self.interface}-settings.json", "w") as f:
471
- json.dump(self.settings, f, indent=4)
472
-
473
- def make_cookies(self, client_id: str, access_token: str, csrf_token: str):
474
- self.cookie_dict = dict(
475
- client_id=client_id,
476
- access_token=access_token,
477
- csrf_token=csrf_token,
478
- )
479
-
480
- def fetch_cookies(self):
481
- """Gets cookies from browser
482
- Sets cookies_dict, cookie_jar
483
- """
484
- cookies = rookiepy.to_cookiejar(rookiepy.load(["www.udemy.com"]))
485
- self.cookie_dict: dict = requests.utils.dict_from_cookiejar(cookies)
486
- self.cookie_jar = cookies
487
-
488
- def get_enrolled_courses(self):
489
- """Get enrolled courses
490
- Sets enrolled_courses
491
-
492
- {slug:enrollment_time}
493
- """
494
- next_page = "https://www.udemy.com/api-2.0/users/me/subscribed-courses/?ordering=-enroll_time&fields[course]=enrollment_time,url&page_size=100"
495
- courses = {}
496
- while next_page:
497
- r = self.client.get(
498
- next_page,
499
- ).json()
500
- for course in r["results"]:
501
- slug = course["url"].split("/")[2]
502
- courses[slug] = course["enrollment_time"]
503
- next_page = r["next"]
504
- self.enrolled_courses = courses
505
-
506
- def compare_versions(self, version1, version2):
507
- v1_parts = list(map(int, version1.split(".")))
508
- v2_parts = list(map(int, version2.split(".")))
509
- max_length = max(len(v1_parts), len(v2_parts))
510
- v1_parts.extend([0] * (max_length - len(v1_parts)))
511
- v2_parts.extend([0] * (max_length - len(v2_parts)))
512
-
513
- for v1, v2 in zip(v1_parts, v2_parts):
514
- if v1 < v2:
515
- return -1
516
- elif v1 > v2:
517
- return 1
518
- return 0
519
-
520
- def check_for_update(self) -> tuple[str, str]:
521
- r_version = (
522
- requests.get(
523
- "https://api.github.com/repos/techtanic/Discounted-Udemy-Course-Enroller/releases/latest"
524
- )
525
- .json()["tag_name"]
526
- .removeprefix("v")
527
- )
528
- c_version = VERSION.removeprefix("v")
529
-
530
- comparison = self.compare_versions(c_version, r_version)
531
-
532
- if comparison == -1:
533
- return (
534
- f"Update {r_version} Available",
535
- f"Update {r_version} Available",
536
- )
537
- elif comparison == 0:
538
- return (
539
- f"Login {c_version}",
540
- f"Discounted-Udemy-Course-Enroller {c_version}",
541
- )
542
- else:
543
- return (
544
- f"Dev Login {c_version}",
545
- f"Dev Discounted-Udemy-Course-Enroller {c_version}",
546
- )
547
-
548
- def manual_login(self, email: str, password: str):
549
- """Manual Login to Udemy using email and password and sets cookies
550
- Args:
551
- email (str): Email
552
- password (str): Password
553
- Raises:
554
- LoginException: Login Error
555
- """
556
- # s = cloudscraper.CloudScraper()
557
-
558
- s = requests.session()
559
- r = s.get(
560
- "https://www.udemy.com/join/signup-popup/?locale=en_US&response_type=html&next=https%3A%2F%2Fwww.udemy.com%2Flogout%2F",
561
- headers={"User-Agent": "okhttp/4.9.2 UdemyAndroid 8.9.2(499) (phone)"},
562
- # headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0",
563
- # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
564
- # 'Accept-Language': 'en-US,en;q=0.5',
565
- # #'Accept-Encoding': 'gzip, deflate, br',
566
- # 'DNT': '1',
567
- # 'Connection': 'keep-alive',
568
- # 'Upgrade-Insecure-Requests': '1',
569
- # 'Sec-Fetch-Dest': 'document',
570
- # 'Sec-Fetch-Mode': 'navigate',
571
- # 'Sec-Fetch-Site': 'none',
572
- # 'Sec-Fetch-User': '?1',
573
- # 'Pragma': 'no-cache',
574
- # 'Cache-Control': 'no-cache'},
575
- )
576
- try:
577
- csrf_token = r.cookies["csrftoken"]
578
- except:
579
- if self.debug:
580
- print(r.text)
581
- data = {
582
- "csrfmiddlewaretoken": csrf_token,
583
- "locale": "en_US",
584
- "email": email,
585
- "password": password,
586
- }
587
-
588
- # ss = requests.session()
589
- s.cookies.update(r.cookies)
590
- s.headers.update(
591
- {
592
- "User-Agent": "okhttp/4.9.2 UdemyAndroid 8.9.2(499) (phone)",
593
- "Accept": "application/json, text/plain, */*",
594
- "Accept-Language": "en-GB,en;q=0.5",
595
- "Referer": "https://www.udemy.com/join/login-popup/?passwordredirect=True&response_type=json",
596
- "Origin": "https://www.udemy.com",
597
- "DNT": "1",
598
- "Host": "www.udemy.com",
599
- "Connection": "keep-alive",
600
- "Sec-Fetch-Dest": "empty",
601
- "Sec-Fetch-Mode": "cors",
602
- "Sec-Fetch-Site": "same-origin",
603
- "Pragma": "no-cache",
604
- "Cache-Control": "no-cache",
605
- }
606
- )
607
- s = cloudscraper.create_scraper(sess=s)
608
- r = s.post(
609
- "https://www.udemy.com/join/login-popup/?passwordredirect=True&response_type=json",
610
- data=data,
611
- allow_redirects=False,
612
- )
613
- if r.text.__contains__("returnUrl"):
614
- self.make_cookies(
615
- r.cookies["client_id"], r.cookies["access_token"], csrf_token
616
- )
617
- else:
618
- login_error = r.json()["error"]["data"]["formErrors"][0]
619
- if login_error[0] == "Y":
620
- raise LoginException("Too many logins per hour try later")
621
- elif login_error[0] == "T":
622
- raise LoginException("Email or password incorrect")
623
- else:
624
- raise LoginException(login_error)
625
-
626
- def get_session_info(self):
627
- """Get Session info
628
- Sets Client Session, currency and name
629
- """
630
- s = cloudscraper.CloudScraper()
631
- # headers = {
632
- # "authorization": "Bearer " + self.cookie_dict["access_token"],
633
- # "accept": "application/json, text/plain, */*",
634
- # "x-requested-with": "XMLHttpRequest",
635
- # "x-forwarded-for": str(
636
- # ".".join(map(str, (random.randint(0, 255) for _ in range(4))))
637
- # ),
638
- # "x-udemy-authorization": "Bearer " + self.cookie_dict["access_token"],
639
- # "content-type": "application/json;charset=UTF-8",
640
- # "origin": "https://www.udemy.com",
641
- # "referer": "https://www.udemy.com/",
642
- # "dnt": "1",
643
- # "User-Agent": "okhttp/4.9.2 UdemyAndroid 8.9.2(499) (phone)",
644
- # }
645
-
646
- headers = {
647
- "User-Agent": "okhttp/4.9.2 UdemyAndroid 8.9.2(499) (phone)",
648
- "Accept": "application/json, text/plain, */*",
649
- "Accept-Language": "en-GB,en;q=0.5",
650
- "Referer": "https://www.udemy.com/",
651
- "X-Requested-With": "XMLHttpRequest",
652
- "DNT": "1",
653
- "Connection": "keep-alive",
654
- "Sec-Fetch-Dest": "empty",
655
- "Sec-Fetch-Mode": "cors",
656
- "Sec-Fetch-Site": "same-origin",
657
- "Pragma": "no-cache",
658
- "Cache-Control": "no-cache",
659
- }
660
- r = s.get(
661
- "https://www.udemy.com/api-2.0/contexts/me/?header=True",
662
- cookies=self.cookie_dict,
663
- headers=headers,
664
- )
665
- r = r.json()
666
- if self.debug:
667
- print(r)
668
- if not r["header"]["isLoggedIn"]:
669
- raise LoginException("Login Failed")
670
-
671
- self.display_name: str = r["header"]["user"]["display_name"]
672
- r = s.get(
673
- "https://www.udemy.com/api-2.0/shopping-carts/me/",
674
- headers=headers,
675
- cookies=self.cookie_dict,
676
- )
677
- r = r.json()
678
- self.currency: str = r["user"]["credit"]["currency_code"]
679
-
680
- s = cloudscraper.CloudScraper()
681
- s.cookies.update(self.cookie_dict)
682
- s.headers.update(headers)
683
- s.keep_alive = False
684
- self.client = s
685
- self.get_enrolled_courses()
686
-
687
- def is_keyword_excluded(self, title: str) -> bool:
688
- title_words = title.casefold().split()
689
- for word in title_words:
690
- word = word.casefold()
691
- if word in self.title_exclude:
692
- return True
693
- return False
694
-
695
- def is_instructor_excluded(self, instructors: list) -> bool:
696
- for instructor in instructors:
697
- if instructor in self.settings["instructor_exclude"]:
698
- return True
699
- return False
700
-
701
- def is_course_updated(self, last_update: str | None) -> bool:
702
- if not last_update:
703
- return True
704
- current_date = datetime.now()
705
- last_update_date = datetime.strptime(last_update, "%Y-%m-%d")
706
- # Calculate the difference in years and months
707
- years = current_date.year - last_update_date.year
708
- months = current_date.month - last_update_date.month
709
- days = current_date.day - last_update_date.day
710
-
711
- # Adjust the months and years if necessary
712
- if days < 0:
713
- months -= 1
714
-
715
- if months < 0:
716
- years -= 1
717
- months += 12
718
-
719
- # Calculate the total month difference
720
- month_diff = years * 12 + months
721
- return month_diff < self.settings["course_update_threshold_months"]
722
-
723
- def is_user_dumb(self) -> bool:
724
- self.sites = [key for key, value in self.settings["sites"].items() if value]
725
- self.categories = [
726
- key for key, value in self.settings["categories"].items() if value
727
- ]
728
- self.languages = [
729
- key for key, value in self.settings["languages"].items() if value
730
- ]
731
- self.instructor_exclude = self.settings["instructor_exclude"]
732
- self.title_exclude = self.settings["title_exclude"]
733
- self.min_rating = self.settings["min_rating"]
734
- return not all([bool(self.sites), bool(self.categories), bool(self.languages)])
735
-
736
- def save_course(self):
737
- if self.settings["save_txt"]:
738
- self.txt_file.write(f"{self.title} - {self.link}\n")
739
- self.txt_file.flush()
740
- os.fsync(self.txt_file.fileno())
741
-
742
- def remove_duplicate_courses(self):
743
- existing_links = set()
744
- new_data = {}
745
- for key, courses in self.scraped_data.items():
746
- new_data[key] = []
747
- for title, link in courses:
748
- link = self.normalize_link(link)
749
- if link not in existing_links:
750
- new_data[key].append((title, link))
751
- existing_links.add(link)
752
- self.scraped_data = {k: v for k, v in new_data.items() if v}
753
-
754
- def normalize_link(self, link):
755
- parsed_url = urlparse(link)
756
- path = (
757
- parsed_url.path if parsed_url.path.endswith("/") else parsed_url.path + "/"
758
- )
759
- return urlunparse(
760
- (
761
- parsed_url.scheme,
762
- parsed_url.netloc,
763
- path,
764
- parsed_url.params,
765
- parsed_url.query,
766
- parsed_url.fragment,
767
- )
768
- )
769
-
770
- def get_course_id(self, url):
771
- course = {
772
- "course_id": None,
773
- "url": url,
774
- "is_invalid": False,
775
- "is_free": None,
776
- "is_excluded": None,
777
- "retry": None,
778
- "msg": "Report to developer",
779
- }
780
- url = re.sub(r"\W+$", "", unquote(url))
781
- try:
782
- r = self.client.get(url)
783
- except requests.exceptions.ConnectionError:
784
- if self.debug:
785
- print(r.text)
786
- course["retry"] = True
787
- return course
788
- course["url"] = r.url
789
- soup = bs(r.content, "html5lib")
790
-
791
- course_id = soup.find("body").get("data-clp-course-id", "invalid")
792
-
793
- if course_id == "invalid":
794
- course["is_invalid"] = True
795
- course["msg"] = "Course ID not found: Report to developer"
796
- return course
797
- course["course_id"] = course_id
798
- dma = json.loads(soup.find("body")["data-module-args"])
799
- if self.debug:
800
- with open("debug/dma.json", "w") as f:
801
- json.dump(dma, f, indent=4)
802
-
803
- if dma.get("view_restriction"):
804
- course["is_invalid"] = True
805
- course["msg"] = dma["serverSideProps"]["limitedAccess"]["errorMessage"][
806
- "title"
807
- ]
808
- return course
809
-
810
- course["is_free"] = not dma["serverSideProps"]["course"].get("isPaid", True)
811
- if not self.debug and self.is_course_excluded(dma):
812
- course["is_excluded"] = True
813
- return course
814
-
815
- return course
816
-
817
- def is_course_excluded(self, dma):
818
- instructors = [
819
- i["absolute_url"].split("/")[-2]
820
- for i in dma["serverSideProps"]["course"]["instructors"]["instructors_info"]
821
- if i["absolute_url"]
822
- ]
823
- lang = dma["serverSideProps"]["course"]["localeSimpleEnglishTitle"]
824
- cat = dma["serverSideProps"]["topicMenu"]["breadcrumbs"][0]["title"]
825
- rating = dma["serverSideProps"]["course"]["rating"]
826
- last_update = dma["serverSideProps"]["course"]["lastUpdateDate"]
827
-
828
- if not self.is_course_updated(last_update):
829
- self.print(
830
- f"Course excluded: Last updated {last_update}", color="light blue"
831
- )
832
- elif self.is_instructor_excluded(instructors):
833
- self.print(f"Instructor excluded: {instructors[0]}", color="light blue")
834
- elif self.is_keyword_excluded(self.title):
835
- self.print("Keyword Excluded", color="light blue")
836
- elif cat not in self.categories:
837
- self.print(f"Category excluded: {cat}", color="light blue")
838
- elif lang not in self.languages:
839
- self.print(f"Language excluded: {lang}", color="light blue")
840
- elif rating < self.min_rating:
841
- self.print(f"Low rating: {rating}", color="light blue")
842
- else:
843
- return False
844
- return True
845
-
846
- def extract_course_coupon(self, url):
847
- params = parse_qs(urlsplit(url).query)
848
- return params.get("couponCode", [False])[0]
849
-
850
- def check_course(self, course_id, coupon_code=None):
851
- url = f"https://www.udemy.com/api-2.0/course-landing-components/{course_id}/me/?components=purchase"
852
- if coupon_code:
853
- url += f",redeem_coupon&couponCode={coupon_code}"
854
-
855
- r = self.client.get(url).json()
856
- if self.debug:
857
- with open("test/check_course.json", "w") as f:
858
- json.dump(r, f, indent=4)
859
- amount = (
860
- r.get("purchase", {})
861
- .get("data", {})
862
- .get("list_price", {})
863
- .get("amount", "retry")
864
- )
865
- coupon_valid = False
866
-
867
- if coupon_code and "redeem_coupon" in r:
868
- discount = r["purchase"]["data"]["pricing_result"]["discount_percent"]
869
- status = r["redeem_coupon"]["discount_attempts"][0]["status"]
870
- coupon_valid = discount == 100 and status == "applied"
871
-
872
- return Decimal(amount), coupon_valid
873
-
874
- def start_enrolling(self):
875
- self.remove_duplicate_courses()
876
- self.initialize_counters()
877
- self.setup_txt_file()
878
-
879
- total_courses = sum(len(courses) for courses in self.scraped_data.values())
880
- previous_courses_count = 0
881
- for site_index, (site, courses) in enumerate(self.scraped_data.items()):
882
- self.print(f"\nSite: {site} [{len(courses)}]", color="cyan")
883
-
884
- for index, (title, link) in enumerate(courses):
885
- self.title = title
886
- self.link = link
887
- self.print_course_info(previous_courses_count + index, total_courses)
888
- self.handle_course_enrollment()
889
- previous_courses_count += len(courses)
890
-
891
- def initialize_counters(self):
892
- self.successfully_enrolled_c = 0
893
- self.already_enrolled_c = 0
894
- self.expired_c = 0
895
- self.excluded_c = 0
896
- self.amount_saved_c = 0
897
-
898
- def setup_txt_file(self):
899
- if self.settings["save_txt"]:
900
- os.makedirs("Courses/", exist_ok=True)
901
- self.txt_file = open(
902
- f"Courses/{time.strftime('%Y-%m-%d--%H-%M')}.txt", "w", encoding="utf-8"
903
- )
904
-
905
- def print_course_info(self, index, total_courses):
906
- self.print(f"[{index + 1} / {total_courses}] ", color="magenta", end=" ")
907
- self.print(self.title, color="yellow", end=" ")
908
- self.print(self.link, color="blue")
909
-
910
- def handle_course_enrollment(self):
911
- slug = self.link.split("/")[4]
912
-
913
- if slug in self.enrolled_courses:
914
- self.print(
915
- f"You purchased this course on {self.get_date_from_utc(self.enrolled_courses[slug])}",
916
- color="light blue",
917
- )
918
- self.already_enrolled_c += 1
919
- return
920
-
921
- course = self.get_course_id(self.link)
922
- if course["is_invalid"]:
923
- self.print(course["msg"], color="red")
924
- self.excluded_c += 1
925
- elif course["retry"]:
926
- self.print("Retrying...", color="red")
927
- time.sleep(1)
928
- self.handle_course_enrollment()
929
- elif course["is_excluded"]:
930
- self.excluded_c += 1
931
- elif course["is_free"]:
932
- self.handle_free_course(course["course_id"])
933
- elif not course["is_free"]:
934
- self.handle_discounted_course(course["course_id"])
935
- else:
936
- self.print("Unknown Error: Report this link to the developer", color="red")
937
- self.excluded_c += 1
938
-
939
- def handle_free_course(self, course_id):
940
- if self.settings["discounted_only"]:
941
- self.print("Free course excluded", color="light blue")
942
- self.excluded_c += 1
943
- else:
944
- success = self.free_checkout(course_id)
945
- if success:
946
- self.print("Successfully Subscribed", color="green")
947
- self.successfully_enrolled_c += 1
948
- self.save_course()
949
- else:
950
- self.print(
951
- "Unknown Error: Report this link to the developer", color="red"
952
- )
953
- self.expired_c += 1
954
-
955
- def discounted_checkout(self, coupon, course_id) -> dict:
956
- payload = {
957
- "checkout_environment": "Marketplace",
958
- "checkout_event": "Submit",
959
- "payment_info": {
960
- "method_id": "0",
961
- "payment_method": "free-method",
962
- "payment_vendor": "Free",
963
- },
964
- "shopping_info": {
965
- "items": [
966
- {
967
- "buyable": {"id": course_id, "type": "course"},
968
- "discountInfo": {"code": coupon},
969
- "price": {"amount": 0, "currency": self.currency.upper()},
970
- }
971
- ],
972
- "is_cart": False,
973
- },
974
- }
975
- headers = {
976
- "User-Agent": "okhttp/4.9.2 UdemyAndroid 8.9.2(499) (phone)",
977
- "Accept": "application/json, text/plain, */*",
978
- "Accept-Language": "en-US",
979
- "Referer": f"https://www.udemy.com/payment/checkout/express/course/{course_id}/?discountCode={coupon}",
980
- "Content-Type": "application/json",
981
- "X-Requested-With": "XMLHttpRequest",
982
- "x-checkout-is-mobile-app": "false",
983
- "Origin": "https://www.udemy.com",
984
- "DNT": "1",
985
- "Sec-GPC": "1",
986
- "Connection": "keep-alive",
987
- "Sec-Fetch-Dest": "empty",
988
- "Sec-Fetch-Mode": "cors",
989
- "Sec-Fetch-Site": "same-origin",
990
- "Priority": "u=0",
991
- }
992
- # csrftoken = None
993
- # for cookie in self.client.cookies:
994
- # if cookie.name == "csrftoken":
995
- # csrftoken = cookie.value
996
- # break
997
-
998
- # if csrftoken:
999
- # headers["X-CSRFToken"] = csrftoken
1000
- # else:
1001
- # raise ValueError("CSRF token not found")
1002
-
1003
- r = self.client.post(
1004
- "https://www.udemy.com/payment/checkout-submit/",
1005
- json=payload,
1006
- headers=headers,
1007
- )
1008
- try:
1009
- r = r.json()
1010
- except:
1011
- self.print(r.text, color="red")
1012
- self.print("Unknown Error: Report this to the developer", color="red")
1013
- return {"status": "failed", "message": "Unknown Error"}
1014
- return r
1015
-
1016
- def free_checkout(self, course_id):
1017
- self.client.get(f"https://www.udemy.com/course/subscribe/?courseId={course_id}")
1018
- r = self.client.get(
1019
- f"https://www.udemy.com/api-2.0/users/me/subscribed-courses/{course_id}/?fields%5Bcourse%5D=%40default%2Cbuyable_object_type%2Cprimary_subcategory%2Cis_private"
1020
- ).json()
1021
- return r.get("_class") == "course"
1022
-
1023
- def handle_discounted_course(self, course_id):
1024
- coupon_code = self.extract_course_coupon(self.link)
1025
- amount, coupon_valid = self.check_course(course_id, coupon_code)
1026
- if amount == "retry":
1027
- self.print("Retrying...", color="red")
1028
- time.sleep(1)
1029
- self.handle_discounted_course(course_id)
1030
- elif coupon_valid: # elif coupon_code and coupon_valid:
1031
- self.process_coupon(course_id, coupon_code, amount)
1032
- else:
1033
- self.print("Coupon Expired", color="red")
1034
- self.expired_c += 1
1035
-
1036
- def process_coupon(self, course_id, coupon_code, amount):
1037
- checkout_response = self.discounted_checkout(coupon_code, course_id)
1038
- if msg := checkout_response.get("detail"):
1039
- self.print(msg, color="red")
1040
- try:
1041
- wait_time = int(re.search(r"\d+", checkout_response["detail"]).group(0))
1042
- except:
1043
- self.print(
1044
- "Unknown Error: Report this link to the developer", color="red"
1045
- )
1046
- self.print(checkout_response, color="red")
1047
- wait_time = 60
1048
- time.sleep(wait_time + 1.5)
1049
- self.process_coupon(course_id, coupon_code, amount)
1050
- elif checkout_response["status"] == "succeeded":
1051
- self.print("Successfully Enrolled To Course :)", color="green")
1052
- self.successfully_enrolled_c += 1
1053
- self.enrolled_courses[course_id] = self.get_now_to_utc()
1054
- self.amount_saved_c += amount
1055
- self.save_course()
1056
- time.sleep(3.8)
1057
- elif checkout_response["status"] == "failed":
1058
- message = checkout_response["message"]
1059
- if "item_already_subscribed" in message:
1060
- self.print("Already Enrolled", color="light blue")
1061
- self.already_enrolled_c += 1
1062
- else:
1063
- self.print("Unknown Error: Report this to the developer", color="red")
1064
- self.print(checkout_response, color="red")
1065
- else:
1066
- self.print("Unknown Error: Report this to the developer", color="red")
1067
- self.print(checkout_response, color="red")