rkwyu commited on
Commit
58293b7
·
1 Parent(s): 3a25a20

Support everand podcast

Browse files
README.md CHANGED
@@ -5,7 +5,9 @@
5
  </a>
6
 
7
  ## About ##
8
- Scribd-dl helps downloading documents on [scribd.com](https://www.scribd.com/) and [slideshare.net](https://www.slideshare.net/) without membership / sign-in.
 
 
9
 
10
  ## Prerequisites ##
11
  To use Scridb-dl, you need to install [Node.js](https://nodejs.org/en/download/). It is recommended that you use the latest LTS version available.
@@ -39,40 +41,52 @@ rendertime=100
39
  [DIRECTORY]
40
  output=output
41
  ```
42
- `rendertime` is the waiting time in millisecond for single page rendering in [scribd.com](https://www.scribd.com/), it is only applicable for `default` mode.
43
  `output` is the ouput directory for generated .pdf files.
44
 
45
  ## Usage (CLI) ##
46
  ```console
47
  Usage: npm start [options] url
48
  Options:
49
- /i image-based: generated by image snapshots taken for pages in scribd.com
50
  ```
51
 
52
- #### Example 1: Download 《The Minds of Billy Milligan》 ####
53
  ```console
54
  npm start "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
55
  ```
56
 
57
- #### Example 2: Download 《The Minds of Billy Milligan》 using `image-based` method ####
58
  ```console
59
  npm start /i "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
60
  ```
61
 
62
- #### Example 3: Download 《Everything You Need To Know About ChatGPT》 ####
63
  ```console
64
  npm start "https://www.slideshare.net/slideshow/everything-you-need-to-know-about-chatgpt-8ba3/266783915"
65
  ```
66
 
 
 
 
 
 
 
 
 
 
 
67
  ## Support URL Format ##
68
  - https://www.scribd.com/doc/**
69
  - https://www.scribd.com/embeds/**
70
  - https://www.slideshare.net/**
71
  - https://www.slideshare.net/slideshow/**
 
 
 
72
 
73
  ## Development Plan ##
74
 
75
- - Support [everand.com](https://www.everand.com/)
76
  - Scribd obfuscates the .pdf files, the texts copied from the documents might become strange garbled message. De-obfuscating is one of the future plan.
77
 
78
  ## License ##
 
5
  </a>
6
 
7
  ## About ##
8
+ Scribd-dl helps downloading:
9
+ - documents on [scribd.com](https://www.scribd.com/) and [slideshare.net](https://www.slideshare.net/) without membership / sign-in
10
+ - podcast audios on [everand.com](https://www.everand.com/podcasts)
11
 
12
  ## Prerequisites ##
13
  To use Scridb-dl, you need to install [Node.js](https://nodejs.org/en/download/). It is recommended that you use the latest LTS version available.
 
41
  [DIRECTORY]
42
  output=output
43
  ```
44
+ `rendertime` is the waiting time in millisecond for single page rendering on [scribd.com](https://www.scribd.com/), it is only applicable for `default` mode.
45
  `output` is the ouput directory for generated .pdf files.
46
 
47
  ## Usage (CLI) ##
48
  ```console
49
  Usage: npm start [options] url
50
  Options:
51
+ /i image-based: generated by image snapshots taken for pages on scribd.com
52
  ```
53
 
54
+ #### Example 1: Download 《The Minds of Billy Milligan》 on scribd.com ####
55
  ```console
56
  npm start "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
57
  ```
58
 
59
+ #### Example 2: Download 《The Minds of Billy Milligan》 using `image-based` method on scribd.com ####
60
  ```console
61
  npm start /i "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
62
  ```
63
 
64
+ #### Example 3: Download 《Everything You Need To Know About ChatGPT》 on slideshare.net ####
65
  ```console
66
  npm start "https://www.slideshare.net/slideshow/everything-you-need-to-know-about-chatgpt-8ba3/266783915"
67
  ```
68
 
69
+ #### Example 4: Download all 《TED Talks Daily》 episodes on everand.com ####
70
+ ```console
71
+ npm start "https://www.everand.com/podcast-show/414106971/TED-Talks-Daily"
72
+ ```
73
+
74
+ #### Example 5: Download 《Sunday Pick: How to care for the people who take care of us (w/ Ai-jen Poo)》 on everand.com ####
75
+ ```console
76
+ npm start "https://www.everand.com/listen/podcast/731670963"
77
+ ```
78
+
79
  ## Support URL Format ##
80
  - https://www.scribd.com/doc/**
81
  - https://www.scribd.com/embeds/**
82
  - https://www.slideshare.net/**
83
  - https://www.slideshare.net/slideshow/**
84
+ - https://www.everand.com/podcast-show/**
85
+ - https://www.everand.com/podcast/**
86
+ - https://www.everand.com/listen/podcast/**
87
 
88
  ## Development Plan ##
89
 
 
90
  - Scribd obfuscates the .pdf files, the texts copied from the documents might become strange garbled message. De-obfuscating is one of the future plan.
91
 
92
  ## License ##
run.js CHANGED
@@ -18,6 +18,6 @@ if (process.argv.length >= 3) {
18
  console.error(`
19
  Usage: npm start [options] url
20
  Options:
21
- /i image-based: generated by image snapshots taken for pages
22
  `)
23
  }
 
18
  console.error(`
19
  Usage: npm start [options] url
20
  Options:
21
+ /i image-based: generated by image snapshots taken for pages on scribd.com
22
  `)
23
  }
src/App.js CHANGED
@@ -1,7 +1,9 @@
1
  import { scribdDownloader } from "./service/ScribdDownloader.js"
2
  import { slideshareDownloader } from "./service/SlideshareDownloader.js"
 
3
  import * as scribdRegex from "./const/ScribdRegex.js"
4
  import * as slideshareRegex from "./const/SlideshareRegex.js"
 
5
 
6
  class App {
7
  constructor() {
@@ -16,6 +18,8 @@ class App {
16
  await scribdDownloader.execute(url, flag)
17
  } else if (url.match(slideshareRegex.DOMAIN)) {
18
  await slideshareDownloader.execute(url)
 
 
19
  } else {
20
  throw new Error(`Unsupported URL: ${url}`)
21
  }
 
1
  import { scribdDownloader } from "./service/ScribdDownloader.js"
2
  import { slideshareDownloader } from "./service/SlideshareDownloader.js"
3
+ import { everandDownloader } from "./service/EverandDownloader.js"
4
  import * as scribdRegex from "./const/ScribdRegex.js"
5
  import * as slideshareRegex from "./const/SlideshareRegex.js"
6
+ import * as everandRegex from "./const/EverandRegex.js"
7
 
8
  class App {
9
  constructor() {
 
18
  await scribdDownloader.execute(url, flag)
19
  } else if (url.match(slideshareRegex.DOMAIN)) {
20
  await slideshareDownloader.execute(url)
21
+ } else if (url.match(everandRegex.DOMAIN)) {
22
+ await everandDownloader.execute(url)
23
  } else {
24
  throw new Error(`Unsupported URL: ${url}`)
25
  }
src/const/EverandRegex.js ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ const DOMAIN = /^https:\/\/www\.everand\.com/
2
+ const PODCAST_SERIES = /^https:\/\/www\.everand\.com\/podcast-show\/([0-9]+)\/([a-zA-z0-9_-]+)/
3
+ const PODCAST_EPISODE = /^https:\/\/www\.everand\.com\/podcast\/([0-9]+)\/([a-zA-z0-9_-]+)/
4
+ const PODCAST_LISTEN = /^https:\/\/www\.everand\.com\/listen\/podcast\/([0-9]+)/
5
+
6
+ export { DOMAIN, PODCAST_SERIES, PODCAST_EPISODE, PODCAST_LISTEN }
src/service/EverandDownloader.js ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cliProgress from "cli-progress"
2
+ import { puppeteerSg } from "../utils/request/PuppeteerSg.js";
3
+ import { pdfGenerator } from "../utils/io/PdfGenerator.js";
4
+ import { configLoader } from "../utils/io/ConfigLoader.js";
5
+ import { directoryIo } from "../utils/io/DirectoryIo.js"
6
+ import * as everandRegex from "../const/EverandRegex.js"
7
+ import { Image } from "../object/Image.js"
8
+ import sharp from "sharp";
9
+ import axios from "axios";
10
+ import fs from "fs"
11
+
12
+
13
+ const output = configLoader.load("DIRECTORY", "output")
14
+
15
+ class EverandDownloader {
16
+ constructor() {
17
+ if (!EverandDownloader.instance) {
18
+ EverandDownloader.instance = this
19
+ }
20
+ return EverandDownloader.instance
21
+ }
22
+
23
+ async execute(url) {
24
+ if (url.match(everandRegex.PODCAST_SERIES)) {
25
+ await this.series(url, )
26
+ } else if (url.match(everandRegex.PODCAST_EPISODE)) {
27
+ await this.listen(`https://www.everand.com/listen/podcast/${everandRegex.PODCAST_EPISODE.exec(url)[1]}`)
28
+ } else if (url.match(everandRegex.PODCAST_LISTEN)) {
29
+ await this.listen(url)
30
+ } else {
31
+ throw new Error(`Unsupported URL: ${url}`)
32
+ }
33
+ }
34
+
35
+ async listen(url, isEpisode) {
36
+ if (typeof isEpisode === "undefined") {
37
+ isEpisode = true
38
+ }
39
+
40
+ const episodeId = everandRegex.PODCAST_LISTEN.exec(url)[1]
41
+
42
+ // navigate to everand
43
+ let page = await puppeteerSg.getPage(url)
44
+
45
+ // wait rendering
46
+ await new Promise(resolve => setTimeout(resolve, 1000))
47
+
48
+ // get title, audio-url, series-url
49
+ const title = await page.evaluate(() => eval('Scribd.current_doc.short_title'))
50
+ const audioUrl = await page.evaluate(() => document.querySelector('audio#audioplayer').src)
51
+ const seriesUrl = await page.evaluate(() => document.querySelector('a[href^="https://www.everand.com/podcast-show/"]').href)
52
+
53
+ // prepare output dir
54
+ let seriesId = everandRegex.PODCAST_SERIES.exec(seriesUrl)[1]
55
+ let dir = `${output}/${seriesId}`
56
+ await directoryIo.create(dir)
57
+
58
+ // download audio
59
+ const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
60
+ if (isEpisode) {
61
+ bar.start(1, 0)
62
+ }
63
+ let path = `${dir}/${episodeId}_${title}.mp3`
64
+ const resp = await axios.get(audioUrl, { responseType: 'stream' })
65
+ resp.data.pipe(fs.createWriteStream(path))
66
+ if (isEpisode) {
67
+ bar.update(1)
68
+ bar.stop()
69
+ }
70
+
71
+ await page.close()
72
+ if (isEpisode) {
73
+ await puppeteerSg.close()
74
+ }
75
+ }
76
+
77
+ async series(url) {
78
+ const seriesId = everandRegex.PODCAST_SERIES.exec(url)[1]
79
+
80
+ // navigate to everand
81
+ let page = await puppeteerSg.getPage(url)
82
+
83
+ // wait rendering
84
+ await new Promise(resolve => setTimeout(resolve, 1000))
85
+
86
+ // get number-of-episodes
87
+ const totalEpisode = await page.evaluate(() => parseInt(document.querySelector('span[data-e2e="podcast-series-header-total-episodes"]').textContent.replace("episodes", "").trim()))
88
+
89
+ // get pages
90
+ const totalPage = await page.evaluate(() => [...document.querySelectorAll('div[data-e2e="pagination"] a[aria-label^="Page"]')].at(-1).textContent)
91
+ const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic)
92
+ bar.start(totalEpisode, 0)
93
+ xx:
94
+ for (let i = 1; i <= totalPage; i++) {
95
+ await page.goto(`${url}?page=${i}&sort=desc`, { waitUntil: "load" })
96
+ await new Promise(resolve => setTimeout(resolve, 1000))
97
+
98
+ let episodes = await page.evaluate(() => [...document.querySelectorAll('div.breakpoint_hide.below a[data-e2e="podcast-episode-player-button"]')].map(x => x.href))
99
+ for (let j = 0; j < episodes.length; j++ ) {
100
+ await this.listen(episodes[j], false)
101
+ bar.update(((i - 1) * 10) + (j + 1))
102
+ break xx
103
+ }
104
+ }
105
+ bar.stop()
106
+
107
+ await page.close()
108
+ await puppeteerSg.close()
109
+ }
110
+ }
111
+
112
+ export const everandDownloader = new EverandDownloader()