File size: 3,344 Bytes
8e7c043
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a25a20
8e7c043
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import cliProgress from "cli-progress"
import { puppeteerSg } from "../utils/request/PuppeteerSg.js";
import { pdfGenerator } from "../utils/io/PdfGenerator.js";
import { configLoader } from "../utils/io/ConfigLoader.js";
import { directoryIo } from "../utils/io/DirectoryIo.js"
import * as slideshareRegex from "../const/SlideshareRegex.js"
import { Image } from "../object/Image.js"
import sharp from "sharp";
import axios from "axios";
import fs from "fs"


const output = configLoader.load("DIRECTORY", "output")

class SlideshareDownloader {
    constructor() {
        if (!SlideshareDownloader.instance) {
            SlideshareDownloader.instance = this
        }
        return SlideshareDownloader.instance
    }

    async execute(url) {
        if (url.match(slideshareRegex.SLIDESHOW)) {
            await this.slideshow(url, slideshareRegex.SLIDESHOW.exec(url)[1])
        } else if (url.match(slideshareRegex.PPT)) {
            await this.slideshow(url, slideshareRegex.PPT.exec(url)[1])
        } else {
            throw new Error(`Unsupported URL: ${url}`)
        }
    }

    async slideshow(url, id) {
        // prepare temp dir
        let dir = `${output}/${id}`
        await directoryIo.create(dir)

        // navigate to slideshare
        let page = await puppeteerSg.getPage(url)

        // wait rendering
        await new Promise(resolve => setTimeout(resolve, 1000))

        // get the page number
        let span = await page.$("span[data-cy='page-number']")
        let pageNumber = parseInt((await span.evaluate((el) => el.textContent)).split("of")[1])

        // get the highest resolution
        let image0 = await page.$("img#slide-image-0")
        let srcset0 = await image0.evaluate((el) => el["srcset"])
        let prefix = ""
        let suffix = ""
        let resolution = -1
        let matches
        while ((matches = slideshareRegex.CDN.exec(srcset0)) != null) {
            if (resolution < parseInt(matches[4])) {
                prefix = matches[1]
                suffix = matches[3]
                resolution = parseInt(matches[4])
            }
        }

        // download images
        let images = []
        const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
        bar.start(pageNumber, 0);
        for (let i = 0; i < pageNumber; i++) {
            let path = `${dir}/${(i + 1).toString().padStart(4, 0)}.png`

            // convert the webp (even it shows jpg) to png
            const resp = await axios.get(
                `${prefix}${i + 1}-${resolution}${suffix}`,
                { responseType: 'arraybuffer' }
            )
            const imageBuffer = await sharp(resp.data).toFormat('png').toBuffer();
            fs.writeFileSync(path, Buffer.from(imageBuffer, 'binary'))

            let metadata = await sharp(path).metadata()
            images.push(new Image(
                path,
                metadata.width,
                metadata.height
            ))
            bar.update(i + 1);
        }
        bar.stop();

        // generate pdf
        await pdfGenerator.generate(images, `${output}/${id}.pdf`)

        // remove temp dir
        directoryIo.remove(`${output}/${id}`)

        await page.close()
        await puppeteerSg.close()
    }
}

export const slideshareDownloader = new SlideshareDownloader()