rkwyu
commited on
Commit
·
8e7c043
1
Parent(s):
22529c1
Add support to slideshare.net
Browse files- README.md +20 -10
- package-lock.json +113 -27
- package.json +1 -0
- run.js +0 -1
- src/App.js +4 -0
- src/const/SlideshareRegex.js +6 -0
- src/service/SlideshareDownloader.js +99 -0
README.md
CHANGED
@@ -2,11 +2,11 @@
|
|
2 |
[](https://www.gnu.org/licenses/gpl-3.0)
|
3 |
|
4 |
## About ##
|
5 |
-
Scribd-dl helps downloading documents on [scribd.com](https://www.scribd.com/) without membership / sign-in.
|
6 |
|
7 |
## Development Plan ##
|
8 |
-
Scribd obfuscates the .pdf files, the texts copied from the documents
|
9 |
-
De-obfuscating will be the next stage.
|
10 |
|
11 |
## Prerequisites ##
|
12 |
Please make sure the following tool(s) / application(s) are properly setup and ready to use:
|
@@ -24,6 +24,7 @@ npm install
|
|
24 |
```
|
25 |
|
26 |
## Configuration ##
|
|
|
27 |
```ini
|
28 |
[SCRIBD]
|
29 |
rendertime=100
|
@@ -31,27 +32,36 @@ rendertime=100
|
|
31 |
[DIRECTORY]
|
32 |
output=output
|
33 |
```
|
34 |
-
|
35 |
-
`rendertime` is the waiting time in millisecond for single page rendering, it is only applicable for `default` mode. (too short might cause missing images)
|
36 |
`output` is the ouput directory for generated .pdf files.
|
37 |
|
38 |
## Usage (CLI) ##
|
39 |
```console
|
40 |
Usage: npm start [options] url
|
41 |
Options:
|
42 |
-
/
|
43 |
-
/i image-based: generated by image snapshots taken for pages
|
44 |
```
|
45 |
|
46 |
#### Example 1: Download 《The Minds of Billy Milligan》 ####
|
47 |
```console
|
48 |
-
npm start https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes
|
|
|
|
|
|
|
|
|
|
|
49 |
```
|
50 |
|
51 |
-
#### Example
|
52 |
```console
|
53 |
-
npm start
|
54 |
```
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
## License ##
|
57 |
[GNU GPL v3.0](LICENSE.md)
|
|
|
2 |
[](https://www.gnu.org/licenses/gpl-3.0)
|
3 |
|
4 |
## About ##
|
5 |
+
Scribd-dl helps downloading documents on [scribd.com](https://www.scribd.com/) and [slideshare.net](https://www.slideshare.net/) without membership / sign-in.
|
6 |
|
7 |
## Development Plan ##
|
8 |
+
Scribd obfuscates the .pdf files, the texts copied from the documents might become strange garbled message.
|
9 |
+
De-obfuscating will be the next stage of this project.
|
10 |
|
11 |
## Prerequisites ##
|
12 |
Please make sure the following tool(s) / application(s) are properly setup and ready to use:
|
|
|
24 |
```
|
25 |
|
26 |
## Configuration ##
|
27 |
+
Configuration can be altered in `config.ini`.
|
28 |
```ini
|
29 |
[SCRIBD]
|
30 |
rendertime=100
|
|
|
32 |
[DIRECTORY]
|
33 |
output=output
|
34 |
```
|
35 |
+
`rendertime` is the waiting time in millisecond for single page rendering in [scribd.com](https://www.scribd.com/), it is only applicable for `default` mode.
|
|
|
36 |
`output` is the ouput directory for generated .pdf files.
|
37 |
|
38 |
## Usage (CLI) ##
|
39 |
```console
|
40 |
Usage: npm start [options] url
|
41 |
Options:
|
42 |
+
/i image-based: generated by image snapshots taken for pages in scribd.com
|
|
|
43 |
```
|
44 |
|
45 |
#### Example 1: Download 《The Minds of Billy Milligan》 ####
|
46 |
```console
|
47 |
+
npm start "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
|
48 |
+
```
|
49 |
+
|
50 |
+
#### Example 2: Download 《The Minds of Billy Milligan》 using `image-based` method ####
|
51 |
+
```console
|
52 |
+
npm start /i "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
|
53 |
```
|
54 |
|
55 |
+
#### Example 3: Download 《Everything You Need To Know About ChatGPT》 ####
|
56 |
```console
|
57 |
+
npm start "https://www.slideshare.net/slideshow/everything-you-need-to-know-about-chatgpt-8ba3/266783915"
|
58 |
```
|
59 |
|
60 |
+
## Support URL Format ##
|
61 |
+
- https://www.scribd.com/doc/**
|
62 |
+
- https://www.scribd.com/embeds/**
|
63 |
+
- https://www.slideshare.net/**
|
64 |
+
- https://www.slideshare.net/slideshow/**
|
65 |
+
|
66 |
## License ##
|
67 |
[GNU GPL v3.0](LICENSE.md)
|
package-lock.json
CHANGED
@@ -9,6 +9,7 @@
|
|
9 |
"version": "1.0.0",
|
10 |
"license": "GPL-3.0-or-later",
|
11 |
"dependencies": {
|
|
|
12 |
"cli-progress": "^3.12.0",
|
13 |
"ini": "^4.1.2",
|
14 |
"pdfkit": "^0.15.0",
|
@@ -1986,14 +1987,6 @@
|
|
1986 |
"@sinonjs/commons": "^3.0.0"
|
1987 |
}
|
1988 |
},
|
1989 |
-
"node_modules/@swc/helpers": {
|
1990 |
-
"version": "0.3.17",
|
1991 |
-
"resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.3.17.tgz",
|
1992 |
-
"integrity": "sha512-tb7Iu+oZ+zWJZ3HJqwx8oNwSDIU440hmVMDPhpACWQWnrZHK99Bxs70gT1L2dnr5Hg50ZRWEFkQCAnOVVV0z1Q==",
|
1993 |
-
"dependencies": {
|
1994 |
-
"tslib": "^2.4.0"
|
1995 |
-
}
|
1996 |
-
},
|
1997 |
"node_modules/@types/babel__core": {
|
1998 |
"version": "7.20.5",
|
1999 |
"resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
|
@@ -2222,6 +2215,11 @@
|
|
2222 |
"url": "https://github.com/sponsors/ljharb"
|
2223 |
}
|
2224 |
},
|
|
|
|
|
|
|
|
|
|
|
2225 |
"node_modules/available-typed-arrays": {
|
2226 |
"version": "1.0.7",
|
2227 |
"resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz",
|
@@ -2236,6 +2234,16 @@
|
|
2236 |
"url": "https://github.com/sponsors/ljharb"
|
2237 |
}
|
2238 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2239 |
"node_modules/babel-jest": {
|
2240 |
"version": "29.7.0",
|
2241 |
"resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz",
|
@@ -2777,6 +2785,17 @@
|
|
2777 |
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
|
2778 |
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
|
2779 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2780 |
"node_modules/concat-map": {
|
2781 |
"version": "0.0.1",
|
2782 |
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
@@ -3077,6 +3096,14 @@
|
|
3077 |
"url": "https://github.com/sponsors/ljharb"
|
3078 |
}
|
3079 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3080 |
"node_modules/detect-libc": {
|
3081 |
"version": "2.0.3",
|
3082 |
"resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz",
|
@@ -3664,20 +3691,23 @@
|
|
3664 |
"integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==",
|
3665 |
"dev": true
|
3666 |
},
|
3667 |
-
"node_modules/
|
3668 |
-
"version": "1.
|
3669 |
-
"resolved": "https://registry.npmjs.org/
|
3670 |
-
"integrity": "sha512-
|
3671 |
-
"
|
3672 |
-
|
3673 |
-
|
3674 |
-
|
3675 |
-
|
3676 |
-
|
3677 |
-
|
3678 |
-
"
|
3679 |
-
|
3680 |
-
|
|
|
|
|
|
|
3681 |
}
|
3682 |
},
|
3683 |
"node_modules/for-each": {
|
@@ -3688,6 +3718,19 @@
|
|
3688 |
"is-callable": "^1.1.3"
|
3689 |
}
|
3690 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3691 |
"node_modules/fs-constants": {
|
3692 |
"version": "1.0.0",
|
3693 |
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
|
@@ -6115,6 +6158,25 @@
|
|
6115 |
"node": ">=8.6"
|
6116 |
}
|
6117 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6118 |
"node_modules/mimic-fn": {
|
6119 |
"version": "2.1.0",
|
6120 |
"resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz",
|
@@ -6434,6 +6496,35 @@
|
|
6434 |
"png-js": "^1.0.0"
|
6435 |
}
|
6436 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6437 |
"node_modules/pend": {
|
6438 |
"version": "1.2.0",
|
6439 |
"resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
|
@@ -6748,11 +6839,6 @@
|
|
6748 |
"node": ">=10"
|
6749 |
}
|
6750 |
},
|
6751 |
-
"node_modules/restructure": {
|
6752 |
-
"version": "2.0.1",
|
6753 |
-
"resolved": "https://registry.npmjs.org/restructure/-/restructure-2.0.1.tgz",
|
6754 |
-
"integrity": "sha512-e0dOpjm5DseomnXx2M5lpdZ5zoHqF1+bqdMJUohoYVVQa7cBdnk7fdmeI6byNWP/kiME72EeTiSypTCVnpLiDg=="
|
6755 |
-
},
|
6756 |
"node_modules/reusify": {
|
6757 |
"version": "1.0.4",
|
6758 |
"resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz",
|
|
|
9 |
"version": "1.0.0",
|
10 |
"license": "GPL-3.0-or-later",
|
11 |
"dependencies": {
|
12 |
+
"axios": "^1.6.8",
|
13 |
"cli-progress": "^3.12.0",
|
14 |
"ini": "^4.1.2",
|
15 |
"pdfkit": "^0.15.0",
|
|
|
1987 |
"@sinonjs/commons": "^3.0.0"
|
1988 |
}
|
1989 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1990 |
"node_modules/@types/babel__core": {
|
1991 |
"version": "7.20.5",
|
1992 |
"resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
|
|
|
2215 |
"url": "https://github.com/sponsors/ljharb"
|
2216 |
}
|
2217 |
},
|
2218 |
+
"node_modules/asynckit": {
|
2219 |
+
"version": "0.4.0",
|
2220 |
+
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
2221 |
+
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
2222 |
+
},
|
2223 |
"node_modules/available-typed-arrays": {
|
2224 |
"version": "1.0.7",
|
2225 |
"resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz",
|
|
|
2234 |
"url": "https://github.com/sponsors/ljharb"
|
2235 |
}
|
2236 |
},
|
2237 |
+
"node_modules/axios": {
|
2238 |
+
"version": "1.6.8",
|
2239 |
+
"resolved": "https://registry.npmjs.org/axios/-/axios-1.6.8.tgz",
|
2240 |
+
"integrity": "sha512-v/ZHtJDU39mDpyBoFVkETcd/uNdxrWRrg3bKpOKzXFA6Bvqopts6ALSMU3y6ijYxbw2B+wPrIv46egTzJXCLGQ==",
|
2241 |
+
"dependencies": {
|
2242 |
+
"follow-redirects": "^1.15.6",
|
2243 |
+
"form-data": "^4.0.0",
|
2244 |
+
"proxy-from-env": "^1.1.0"
|
2245 |
+
}
|
2246 |
+
},
|
2247 |
"node_modules/babel-jest": {
|
2248 |
"version": "29.7.0",
|
2249 |
"resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz",
|
|
|
2785 |
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
|
2786 |
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
|
2787 |
},
|
2788 |
+
"node_modules/combined-stream": {
|
2789 |
+
"version": "1.0.8",
|
2790 |
+
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
2791 |
+
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
2792 |
+
"dependencies": {
|
2793 |
+
"delayed-stream": "~1.0.0"
|
2794 |
+
},
|
2795 |
+
"engines": {
|
2796 |
+
"node": ">= 0.8"
|
2797 |
+
}
|
2798 |
+
},
|
2799 |
"node_modules/concat-map": {
|
2800 |
"version": "0.0.1",
|
2801 |
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
|
|
3096 |
"url": "https://github.com/sponsors/ljharb"
|
3097 |
}
|
3098 |
},
|
3099 |
+
"node_modules/delayed-stream": {
|
3100 |
+
"version": "1.0.0",
|
3101 |
+
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
3102 |
+
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
3103 |
+
"engines": {
|
3104 |
+
"node": ">=0.4.0"
|
3105 |
+
}
|
3106 |
+
},
|
3107 |
"node_modules/detect-libc": {
|
3108 |
"version": "2.0.3",
|
3109 |
"resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz",
|
|
|
3691 |
"integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==",
|
3692 |
"dev": true
|
3693 |
},
|
3694 |
+
"node_modules/follow-redirects": {
|
3695 |
+
"version": "1.15.6",
|
3696 |
+
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
|
3697 |
+
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
|
3698 |
+
"funding": [
|
3699 |
+
{
|
3700 |
+
"type": "individual",
|
3701 |
+
"url": "https://github.com/sponsors/RubenVerborgh"
|
3702 |
+
}
|
3703 |
+
],
|
3704 |
+
"engines": {
|
3705 |
+
"node": ">=4.0"
|
3706 |
+
},
|
3707 |
+
"peerDependenciesMeta": {
|
3708 |
+
"debug": {
|
3709 |
+
"optional": true
|
3710 |
+
}
|
3711 |
}
|
3712 |
},
|
3713 |
"node_modules/for-each": {
|
|
|
3718 |
"is-callable": "^1.1.3"
|
3719 |
}
|
3720 |
},
|
3721 |
+
"node_modules/form-data": {
|
3722 |
+
"version": "4.0.0",
|
3723 |
+
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
3724 |
+
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
3725 |
+
"dependencies": {
|
3726 |
+
"asynckit": "^0.4.0",
|
3727 |
+
"combined-stream": "^1.0.8",
|
3728 |
+
"mime-types": "^2.1.12"
|
3729 |
+
},
|
3730 |
+
"engines": {
|
3731 |
+
"node": ">= 6"
|
3732 |
+
}
|
3733 |
+
},
|
3734 |
"node_modules/fs-constants": {
|
3735 |
"version": "1.0.0",
|
3736 |
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
|
|
|
6158 |
"node": ">=8.6"
|
6159 |
}
|
6160 |
},
|
6161 |
+
"node_modules/mime-db": {
|
6162 |
+
"version": "1.52.0",
|
6163 |
+
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
6164 |
+
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
6165 |
+
"engines": {
|
6166 |
+
"node": ">= 0.6"
|
6167 |
+
}
|
6168 |
+
},
|
6169 |
+
"node_modules/mime-types": {
|
6170 |
+
"version": "2.1.35",
|
6171 |
+
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
6172 |
+
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
6173 |
+
"dependencies": {
|
6174 |
+
"mime-db": "1.52.0"
|
6175 |
+
},
|
6176 |
+
"engines": {
|
6177 |
+
"node": ">= 0.6"
|
6178 |
+
}
|
6179 |
+
},
|
6180 |
"node_modules/mimic-fn": {
|
6181 |
"version": "2.1.0",
|
6182 |
"resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz",
|
|
|
6496 |
"png-js": "^1.0.0"
|
6497 |
}
|
6498 |
},
|
6499 |
+
"node_modules/pdfkit/node_modules/@swc/helpers": {
|
6500 |
+
"version": "0.3.17",
|
6501 |
+
"resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.3.17.tgz",
|
6502 |
+
"integrity": "sha512-tb7Iu+oZ+zWJZ3HJqwx8oNwSDIU440hmVMDPhpACWQWnrZHK99Bxs70gT1L2dnr5Hg50ZRWEFkQCAnOVVV0z1Q==",
|
6503 |
+
"dependencies": {
|
6504 |
+
"tslib": "^2.4.0"
|
6505 |
+
}
|
6506 |
+
},
|
6507 |
+
"node_modules/pdfkit/node_modules/fontkit": {
|
6508 |
+
"version": "1.9.0",
|
6509 |
+
"resolved": "https://registry.npmjs.org/fontkit/-/fontkit-1.9.0.tgz",
|
6510 |
+
"integrity": "sha512-HkW/8Lrk8jl18kzQHvAw9aTHe1cqsyx5sDnxncx652+CIfhawokEPkeM3BoIC+z/Xv7a0yMr0f3pRRwhGH455g==",
|
6511 |
+
"dependencies": {
|
6512 |
+
"@swc/helpers": "^0.3.13",
|
6513 |
+
"brotli": "^1.3.2",
|
6514 |
+
"clone": "^2.1.2",
|
6515 |
+
"deep-equal": "^2.0.5",
|
6516 |
+
"dfa": "^1.2.0",
|
6517 |
+
"restructure": "^2.0.1",
|
6518 |
+
"tiny-inflate": "^1.0.3",
|
6519 |
+
"unicode-properties": "^1.3.1",
|
6520 |
+
"unicode-trie": "^2.0.0"
|
6521 |
+
}
|
6522 |
+
},
|
6523 |
+
"node_modules/pdfkit/node_modules/restructure": {
|
6524 |
+
"version": "2.0.1",
|
6525 |
+
"resolved": "https://registry.npmjs.org/restructure/-/restructure-2.0.1.tgz",
|
6526 |
+
"integrity": "sha512-e0dOpjm5DseomnXx2M5lpdZ5zoHqF1+bqdMJUohoYVVQa7cBdnk7fdmeI6byNWP/kiME72EeTiSypTCVnpLiDg=="
|
6527 |
+
},
|
6528 |
"node_modules/pend": {
|
6529 |
"version": "1.2.0",
|
6530 |
"resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
|
|
|
6839 |
"node": ">=10"
|
6840 |
}
|
6841 |
},
|
|
|
|
|
|
|
|
|
|
|
6842 |
"node_modules/reusify": {
|
6843 |
"version": "1.0.4",
|
6844 |
"resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz",
|
package.json
CHANGED
@@ -17,6 +17,7 @@
|
|
17 |
},
|
18 |
"license": "GPL-3.0-or-later",
|
19 |
"dependencies": {
|
|
|
20 |
"cli-progress": "^3.12.0",
|
21 |
"ini": "^4.1.2",
|
22 |
"pdfkit": "^0.15.0",
|
|
|
17 |
},
|
18 |
"license": "GPL-3.0-or-later",
|
19 |
"dependencies": {
|
20 |
+
"axios": "^1.6.8",
|
21 |
"cli-progress": "^3.12.0",
|
22 |
"ini": "^4.1.2",
|
23 |
"pdfkit": "^0.15.0",
|
run.js
CHANGED
@@ -18,7 +18,6 @@ if (process.argv.length >= 3) {
|
|
18 |
console.error(`
|
19 |
Usage: npm start [options] url
|
20 |
Options:
|
21 |
-
/d default: generated by chromium's print function
|
22 |
/i image-based: generated by image snapshots taken for pages
|
23 |
`)
|
24 |
}
|
|
|
18 |
console.error(`
|
19 |
Usage: npm start [options] url
|
20 |
Options:
|
|
|
21 |
/i image-based: generated by image snapshots taken for pages
|
22 |
`)
|
23 |
}
|
src/App.js
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import { scribdDownloader } from "./service/ScribdDownloader.js"
|
|
|
2 |
import * as scribdRegex from "./const/ScribdRegex.js"
|
|
|
3 |
|
4 |
class App {
|
5 |
constructor() {
|
@@ -12,6 +14,8 @@ class App {
|
|
12 |
async execute(url, flag) {
|
13 |
if (url.match(scribdRegex.DOMAIN)) {
|
14 |
await scribdDownloader.execute(url, flag)
|
|
|
|
|
15 |
} else {
|
16 |
throw new Error(`Unsupported URL: ${url}`)
|
17 |
}
|
|
|
1 |
import { scribdDownloader } from "./service/ScribdDownloader.js"
|
2 |
+
import { slideshareDownloader } from "./service/SlideshareDownloader.js"
|
3 |
import * as scribdRegex from "./const/ScribdRegex.js"
|
4 |
+
import * as slideshareRegex from "./const/SlideshareRegex.js"
|
5 |
|
6 |
class App {
|
7 |
constructor() {
|
|
|
14 |
async execute(url, flag) {
|
15 |
if (url.match(scribdRegex.DOMAIN)) {
|
16 |
await scribdDownloader.execute(url, flag)
|
17 |
+
} else if (url.match(slideshareRegex.DOMAIN)) {
|
18 |
+
await slideshareDownloader.execute(url, flag)
|
19 |
} else {
|
20 |
throw new Error(`Unsupported URL: ${url}`)
|
21 |
}
|
src/const/SlideshareRegex.js
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
const DOMAIN = /^https:\/\/www\.slideshare\.net/
|
2 |
+
const SLIDESHOW = /^https:\/\/www\.slideshare\.net\/slideshow\/([a-zA-z0-9_-]+)\/([0-9]+)/
|
3 |
+
const PPT = /^https:\/\/www\.slideshare\.net\/[a-zA-z0-9_-]+\/([a-zA-z0-9_-]+)/
|
4 |
+
const CDN = /(https:\/\/image\.slidesharecdn\.com\/[a-zA-z0-9_-]+\/[0-9]+\/[a-zA-z0-9_-]+-)[0-9]+-([0-9]+)(.[a-zA-Z]+)\ ([0-9]+)w/g
|
5 |
+
|
6 |
+
export { DOMAIN, SLIDESHOW, PPT, CDN }
|
src/service/SlideshareDownloader.js
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cliProgress from "cli-progress"
|
2 |
+
import { puppeteerSg } from "../utils/request/PuppeteerSg.js";
|
3 |
+
import { pdfGenerator } from "../utils/io/PdfGenerator.js";
|
4 |
+
import { configLoader } from "../utils/io/ConfigLoader.js";
|
5 |
+
import { directoryIo } from "../utils/io/DirectoryIo.js"
|
6 |
+
import * as slideshareRegex from "../const/SlideshareRegex.js"
|
7 |
+
import { Image } from "../object/Image.js"
|
8 |
+
import sharp from "sharp";
|
9 |
+
import axios from "axios";
|
10 |
+
import fs from "fs"
|
11 |
+
|
12 |
+
|
13 |
+
const output = configLoader.load("DIRECTORY", "output")
|
14 |
+
|
15 |
+
class SlideshareDownloader {
|
16 |
+
constructor() {
|
17 |
+
if (!SlideshareDownloader.instance) {
|
18 |
+
SlideshareDownloader.instance = this
|
19 |
+
}
|
20 |
+
return SlideshareDownloader.instance
|
21 |
+
}
|
22 |
+
|
23 |
+
async execute(url) {
|
24 |
+
if (url.match(slideshareRegex.SLIDESHOW)) {
|
25 |
+
await this.slideshow(url, slideshareRegex.SLIDESHOW.exec(url)[1])
|
26 |
+
} else if (url.match(slideshareRegex.PPT)) {
|
27 |
+
await this.slideshow(url, slideshareRegex.PPT.exec(url)[1])
|
28 |
+
} else {
|
29 |
+
throw new Error(`Unsupported URL: ${url}`)
|
30 |
+
}
|
31 |
+
}
|
32 |
+
|
33 |
+
async slideshow(url, id) {
|
34 |
+
// prepare temp dir
|
35 |
+
let dir = `${output}/${id}`
|
36 |
+
await directoryIo.create(dir)
|
37 |
+
|
38 |
+
// navigate to scribd
|
39 |
+
let page = await puppeteerSg.getPage(url)
|
40 |
+
|
41 |
+
// wait rendering
|
42 |
+
await new Promise(resolve => setTimeout(resolve, 1000))
|
43 |
+
|
44 |
+
// get the page number
|
45 |
+
let span = await page.$("span[data-cy='page-number']")
|
46 |
+
let pageNumber = parseInt((await span.evaluate((el) => el.textContent)).split("of")[1])
|
47 |
+
|
48 |
+
// get the highest resolution
|
49 |
+
let image0 = await page.$("img#slide-image-0")
|
50 |
+
let srcset0 = await image0.evaluate((el) => el["srcset"])
|
51 |
+
let prefix = ""
|
52 |
+
let suffix = ""
|
53 |
+
let resolution = -1
|
54 |
+
let matches
|
55 |
+
while ((matches = slideshareRegex.CDN.exec(srcset0)) != null) {
|
56 |
+
if (resolution < parseInt(matches[4])) {
|
57 |
+
prefix = matches[1]
|
58 |
+
suffix = matches[3]
|
59 |
+
resolution = parseInt(matches[4])
|
60 |
+
}
|
61 |
+
}
|
62 |
+
|
63 |
+
// download images
|
64 |
+
let images = []
|
65 |
+
const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
66 |
+
bar.start(pageNumber, 0);
|
67 |
+
for (let i = 0; i < pageNumber; i++) {
|
68 |
+
let path = `${dir}/${(i + 1).toString().padStart(4, 0)}.png`
|
69 |
+
|
70 |
+
// convert the webp (even it shows jpg) to png
|
71 |
+
const resp = await axios.get(
|
72 |
+
`${prefix}${i + 1}-${resolution}${suffix}`,
|
73 |
+
{ responseType: 'arraybuffer' }
|
74 |
+
)
|
75 |
+
const imageBuffer = await sharp(resp.data).toFormat('png').toBuffer();
|
76 |
+
fs.writeFileSync(path, Buffer.from(imageBuffer, 'binary'))
|
77 |
+
|
78 |
+
let metadata = await sharp(path).metadata()
|
79 |
+
images.push(new Image(
|
80 |
+
path,
|
81 |
+
metadata.width,
|
82 |
+
metadata.height
|
83 |
+
))
|
84 |
+
bar.update(i + 1);
|
85 |
+
}
|
86 |
+
bar.stop();
|
87 |
+
|
88 |
+
// generate pdf
|
89 |
+
await pdfGenerator.generate(images, `${output}/${id}.pdf`)
|
90 |
+
|
91 |
+
// remove temp dir
|
92 |
+
directoryIo.remove(`${output}/${id}`)
|
93 |
+
|
94 |
+
await page.close()
|
95 |
+
await puppeteerSg.close()
|
96 |
+
}
|
97 |
+
}
|
98 |
+
|
99 |
+
export const slideshareDownloader = new SlideshareDownloader()
|