rkwyu
commited on
Commit
·
58293b7
1
Parent(s):
3a25a20
Support everand podcast
Browse files- README.md +21 -7
- run.js +1 -1
- src/App.js +4 -0
- src/const/EverandRegex.js +6 -0
- src/service/EverandDownloader.js +112 -0
README.md
CHANGED
@@ -5,7 +5,9 @@
|
|
5 |
</a>
|
6 |
|
7 |
## About ##
|
8 |
-
Scribd-dl helps downloading
|
|
|
|
|
9 |
|
10 |
## Prerequisites ##
|
11 |
To use Scridb-dl, you need to install [Node.js](https://nodejs.org/en/download/). It is recommended that you use the latest LTS version available.
|
@@ -39,40 +41,52 @@ rendertime=100
|
|
39 |
[DIRECTORY]
|
40 |
output=output
|
41 |
```
|
42 |
-
`rendertime` is the waiting time in millisecond for single page rendering
|
43 |
`output` is the ouput directory for generated .pdf files.
|
44 |
|
45 |
## Usage (CLI) ##
|
46 |
```console
|
47 |
Usage: npm start [options] url
|
48 |
Options:
|
49 |
-
/i image-based: generated by image snapshots taken for pages
|
50 |
```
|
51 |
|
52 |
-
#### Example 1: Download 《The Minds of Billy Milligan》 ####
|
53 |
```console
|
54 |
npm start "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
|
55 |
```
|
56 |
|
57 |
-
#### Example 2: Download 《The Minds of Billy Milligan》 using `image-based` method ####
|
58 |
```console
|
59 |
npm start /i "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
|
60 |
```
|
61 |
|
62 |
-
#### Example 3: Download 《Everything You Need To Know About ChatGPT》 ####
|
63 |
```console
|
64 |
npm start "https://www.slideshare.net/slideshow/everything-you-need-to-know-about-chatgpt-8ba3/266783915"
|
65 |
```
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
## Support URL Format ##
|
68 |
- https://www.scribd.com/doc/**
|
69 |
- https://www.scribd.com/embeds/**
|
70 |
- https://www.slideshare.net/**
|
71 |
- https://www.slideshare.net/slideshow/**
|
|
|
|
|
|
|
72 |
|
73 |
## Development Plan ##
|
74 |
|
75 |
-
- Support [everand.com](https://www.everand.com/)
|
76 |
- Scribd obfuscates the .pdf files, the texts copied from the documents might become strange garbled message. De-obfuscating is one of the future plan.
|
77 |
|
78 |
## License ##
|
|
|
5 |
</a>
|
6 |
|
7 |
## About ##
|
8 |
+
Scribd-dl helps downloading:
|
9 |
+
- documents on [scribd.com](https://www.scribd.com/) and [slideshare.net](https://www.slideshare.net/) without membership / sign-in
|
10 |
+
- podcast audios on [everand.com](https://www.everand.com/podcasts)
|
11 |
|
12 |
## Prerequisites ##
|
13 |
To use Scridb-dl, you need to install [Node.js](https://nodejs.org/en/download/). It is recommended that you use the latest LTS version available.
|
|
|
41 |
[DIRECTORY]
|
42 |
output=output
|
43 |
```
|
44 |
+
`rendertime` is the waiting time in millisecond for single page rendering on [scribd.com](https://www.scribd.com/), it is only applicable for `default` mode.
|
45 |
`output` is the ouput directory for generated .pdf files.
|
46 |
|
47 |
## Usage (CLI) ##
|
48 |
```console
|
49 |
Usage: npm start [options] url
|
50 |
Options:
|
51 |
+
/i image-based: generated by image snapshots taken for pages on scribd.com
|
52 |
```
|
53 |
|
54 |
+
#### Example 1: Download 《The Minds of Billy Milligan》 on scribd.com ####
|
55 |
```console
|
56 |
npm start "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
|
57 |
```
|
58 |
|
59 |
+
#### Example 2: Download 《The Minds of Billy Milligan》 using `image-based` method on scribd.com ####
|
60 |
```console
|
61 |
npm start /i "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
|
62 |
```
|
63 |
|
64 |
+
#### Example 3: Download 《Everything You Need To Know About ChatGPT》 on slideshare.net ####
|
65 |
```console
|
66 |
npm start "https://www.slideshare.net/slideshow/everything-you-need-to-know-about-chatgpt-8ba3/266783915"
|
67 |
```
|
68 |
|
69 |
+
#### Example 4: Download all 《TED Talks Daily》 episodes on everand.com ####
|
70 |
+
```console
|
71 |
+
npm start "https://www.everand.com/podcast-show/414106971/TED-Talks-Daily"
|
72 |
+
```
|
73 |
+
|
74 |
+
#### Example 5: Download 《Sunday Pick: How to care for the people who take care of us (w/ Ai-jen Poo)》 on everand.com ####
|
75 |
+
```console
|
76 |
+
npm start "https://www.everand.com/listen/podcast/731670963"
|
77 |
+
```
|
78 |
+
|
79 |
## Support URL Format ##
|
80 |
- https://www.scribd.com/doc/**
|
81 |
- https://www.scribd.com/embeds/**
|
82 |
- https://www.slideshare.net/**
|
83 |
- https://www.slideshare.net/slideshow/**
|
84 |
+
- https://www.everand.com/podcast-show/**
|
85 |
+
- https://www.everand.com/podcast/**
|
86 |
+
- https://www.everand.com/listen/podcast/**
|
87 |
|
88 |
## Development Plan ##
|
89 |
|
|
|
90 |
- Scribd obfuscates the .pdf files, the texts copied from the documents might become strange garbled message. De-obfuscating is one of the future plan.
|
91 |
|
92 |
## License ##
|
run.js
CHANGED
@@ -18,6 +18,6 @@ if (process.argv.length >= 3) {
|
|
18 |
console.error(`
|
19 |
Usage: npm start [options] url
|
20 |
Options:
|
21 |
-
/i image-based: generated by image snapshots taken for pages
|
22 |
`)
|
23 |
}
|
|
|
18 |
console.error(`
|
19 |
Usage: npm start [options] url
|
20 |
Options:
|
21 |
+
/i image-based: generated by image snapshots taken for pages on scribd.com
|
22 |
`)
|
23 |
}
|
src/App.js
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
import { scribdDownloader } from "./service/ScribdDownloader.js"
|
2 |
import { slideshareDownloader } from "./service/SlideshareDownloader.js"
|
|
|
3 |
import * as scribdRegex from "./const/ScribdRegex.js"
|
4 |
import * as slideshareRegex from "./const/SlideshareRegex.js"
|
|
|
5 |
|
6 |
class App {
|
7 |
constructor() {
|
@@ -16,6 +18,8 @@ class App {
|
|
16 |
await scribdDownloader.execute(url, flag)
|
17 |
} else if (url.match(slideshareRegex.DOMAIN)) {
|
18 |
await slideshareDownloader.execute(url)
|
|
|
|
|
19 |
} else {
|
20 |
throw new Error(`Unsupported URL: ${url}`)
|
21 |
}
|
|
|
1 |
import { scribdDownloader } from "./service/ScribdDownloader.js"
|
2 |
import { slideshareDownloader } from "./service/SlideshareDownloader.js"
|
3 |
+
import { everandDownloader } from "./service/EverandDownloader.js"
|
4 |
import * as scribdRegex from "./const/ScribdRegex.js"
|
5 |
import * as slideshareRegex from "./const/SlideshareRegex.js"
|
6 |
+
import * as everandRegex from "./const/EverandRegex.js"
|
7 |
|
8 |
class App {
|
9 |
constructor() {
|
|
|
18 |
await scribdDownloader.execute(url, flag)
|
19 |
} else if (url.match(slideshareRegex.DOMAIN)) {
|
20 |
await slideshareDownloader.execute(url)
|
21 |
+
} else if (url.match(everandRegex.DOMAIN)) {
|
22 |
+
await everandDownloader.execute(url)
|
23 |
} else {
|
24 |
throw new Error(`Unsupported URL: ${url}`)
|
25 |
}
|
src/const/EverandRegex.js
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
const DOMAIN = /^https:\/\/www\.everand\.com/
|
2 |
+
const PODCAST_SERIES = /^https:\/\/www\.everand\.com\/podcast-show\/([0-9]+)\/([a-zA-z0-9_-]+)/
|
3 |
+
const PODCAST_EPISODE = /^https:\/\/www\.everand\.com\/podcast\/([0-9]+)\/([a-zA-z0-9_-]+)/
|
4 |
+
const PODCAST_LISTEN = /^https:\/\/www\.everand\.com\/listen\/podcast\/([0-9]+)/
|
5 |
+
|
6 |
+
export { DOMAIN, PODCAST_SERIES, PODCAST_EPISODE, PODCAST_LISTEN }
|
src/service/EverandDownloader.js
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cliProgress from "cli-progress"
|
2 |
+
import { puppeteerSg } from "../utils/request/PuppeteerSg.js";
|
3 |
+
import { pdfGenerator } from "../utils/io/PdfGenerator.js";
|
4 |
+
import { configLoader } from "../utils/io/ConfigLoader.js";
|
5 |
+
import { directoryIo } from "../utils/io/DirectoryIo.js"
|
6 |
+
import * as everandRegex from "../const/EverandRegex.js"
|
7 |
+
import { Image } from "../object/Image.js"
|
8 |
+
import sharp from "sharp";
|
9 |
+
import axios from "axios";
|
10 |
+
import fs from "fs"
|
11 |
+
|
12 |
+
|
13 |
+
const output = configLoader.load("DIRECTORY", "output")
|
14 |
+
|
15 |
+
class EverandDownloader {
|
16 |
+
constructor() {
|
17 |
+
if (!EverandDownloader.instance) {
|
18 |
+
EverandDownloader.instance = this
|
19 |
+
}
|
20 |
+
return EverandDownloader.instance
|
21 |
+
}
|
22 |
+
|
23 |
+
async execute(url) {
|
24 |
+
if (url.match(everandRegex.PODCAST_SERIES)) {
|
25 |
+
await this.series(url, )
|
26 |
+
} else if (url.match(everandRegex.PODCAST_EPISODE)) {
|
27 |
+
await this.listen(`https://www.everand.com/listen/podcast/${everandRegex.PODCAST_EPISODE.exec(url)[1]}`)
|
28 |
+
} else if (url.match(everandRegex.PODCAST_LISTEN)) {
|
29 |
+
await this.listen(url)
|
30 |
+
} else {
|
31 |
+
throw new Error(`Unsupported URL: ${url}`)
|
32 |
+
}
|
33 |
+
}
|
34 |
+
|
35 |
+
async listen(url, isEpisode) {
|
36 |
+
if (typeof isEpisode === "undefined") {
|
37 |
+
isEpisode = true
|
38 |
+
}
|
39 |
+
|
40 |
+
const episodeId = everandRegex.PODCAST_LISTEN.exec(url)[1]
|
41 |
+
|
42 |
+
// navigate to everand
|
43 |
+
let page = await puppeteerSg.getPage(url)
|
44 |
+
|
45 |
+
// wait rendering
|
46 |
+
await new Promise(resolve => setTimeout(resolve, 1000))
|
47 |
+
|
48 |
+
// get title, audio-url, series-url
|
49 |
+
const title = await page.evaluate(() => eval('Scribd.current_doc.short_title'))
|
50 |
+
const audioUrl = await page.evaluate(() => document.querySelector('audio#audioplayer').src)
|
51 |
+
const seriesUrl = await page.evaluate(() => document.querySelector('a[href^="https://www.everand.com/podcast-show/"]').href)
|
52 |
+
|
53 |
+
// prepare output dir
|
54 |
+
let seriesId = everandRegex.PODCAST_SERIES.exec(seriesUrl)[1]
|
55 |
+
let dir = `${output}/${seriesId}`
|
56 |
+
await directoryIo.create(dir)
|
57 |
+
|
58 |
+
// download audio
|
59 |
+
const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
60 |
+
if (isEpisode) {
|
61 |
+
bar.start(1, 0)
|
62 |
+
}
|
63 |
+
let path = `${dir}/${episodeId}_${title}.mp3`
|
64 |
+
const resp = await axios.get(audioUrl, { responseType: 'stream' })
|
65 |
+
resp.data.pipe(fs.createWriteStream(path))
|
66 |
+
if (isEpisode) {
|
67 |
+
bar.update(1)
|
68 |
+
bar.stop()
|
69 |
+
}
|
70 |
+
|
71 |
+
await page.close()
|
72 |
+
if (isEpisode) {
|
73 |
+
await puppeteerSg.close()
|
74 |
+
}
|
75 |
+
}
|
76 |
+
|
77 |
+
async series(url) {
|
78 |
+
const seriesId = everandRegex.PODCAST_SERIES.exec(url)[1]
|
79 |
+
|
80 |
+
// navigate to everand
|
81 |
+
let page = await puppeteerSg.getPage(url)
|
82 |
+
|
83 |
+
// wait rendering
|
84 |
+
await new Promise(resolve => setTimeout(resolve, 1000))
|
85 |
+
|
86 |
+
// get number-of-episodes
|
87 |
+
const totalEpisode = await page.evaluate(() => parseInt(document.querySelector('span[data-e2e="podcast-series-header-total-episodes"]').textContent.replace("episodes", "").trim()))
|
88 |
+
|
89 |
+
// get pages
|
90 |
+
const totalPage = await page.evaluate(() => [...document.querySelectorAll('div[data-e2e="pagination"] a[aria-label^="Page"]')].at(-1).textContent)
|
91 |
+
const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic)
|
92 |
+
bar.start(totalEpisode, 0)
|
93 |
+
xx:
|
94 |
+
for (let i = 1; i <= totalPage; i++) {
|
95 |
+
await page.goto(`${url}?page=${i}&sort=desc`, { waitUntil: "load" })
|
96 |
+
await new Promise(resolve => setTimeout(resolve, 1000))
|
97 |
+
|
98 |
+
let episodes = await page.evaluate(() => [...document.querySelectorAll('div.breakpoint_hide.below a[data-e2e="podcast-episode-player-button"]')].map(x => x.href))
|
99 |
+
for (let j = 0; j < episodes.length; j++ ) {
|
100 |
+
await this.listen(episodes[j], false)
|
101 |
+
bar.update(((i - 1) * 10) + (j + 1))
|
102 |
+
break xx
|
103 |
+
}
|
104 |
+
}
|
105 |
+
bar.stop()
|
106 |
+
|
107 |
+
await page.close()
|
108 |
+
await puppeteerSg.close()
|
109 |
+
}
|
110 |
+
}
|
111 |
+
|
112 |
+
export const everandDownloader = new EverandDownloader()
|