File size: 6,343 Bytes
f6f0c71 acf17aa 3a25a20 f6f0c71 acf17aa f6f0c71 acf17aa f6f0c71 acf17aa f6f0c71 acf17aa f6f0c71 acf17aa f6f0c71 acf17aa f6f0c71 acf17aa f6f0c71 acf17aa f6f0c71 c9bbadf f6f0c71 acf17aa f6f0c71 acf17aa f6f0c71 acf17aa f6f0c71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import cliProgress from "cli-progress"
import { puppeteerSg } from "../utils/request/PuppeteerSg.js";
import { pdfGenerator } from "../utils/io/PdfGenerator.js";
import { configLoader } from "../utils/io/ConfigLoader.js";
import { directoryIo } from "../utils/io/DirectoryIo.js"
import * as scribdRegex from "../const/ScribdRegex.js"
import * as scribdFlag from '../const/ScribdFlag.js'
import { Image } from "../object/Image.js"
import sharp from "sharp";
import path from 'path'
const output = configLoader.load("DIRECTORY", "output")
const rendertime = parseInt(configLoader.load("SCRIBD", "rendertime"))
class ScribdDownloader {
constructor() {
if (!ScribdDownloader.instance) {
ScribdDownloader.instance = this
}
return ScribdDownloader.instance
}
async execute(url, flag) {
let fn;
if (flag === scribdFlag.IMAGE) {
console.log(`Mode: IMAGE`)
fn = this.embeds_image
} else {
console.log(`Mode: DEFAULT`)
fn = this.embeds_default
}
if (url.match(scribdRegex.DOCUMENT)) {
await fn(`https://www.scribd.com/embeds/${scribdRegex.DOCUMENT.exec(url)[2]}/content`)
} else if (url.match(scribdRegex.EMBED)) {
await fn(url)
} else {
throw new Error(`Unsupported URL: ${url}`)
}
}
async embeds_default(url) {
const m = scribdRegex.EMBED.exec(url)
if (m) {
let id = m[1]
// navigate to scribd
let page = await puppeteerSg.getPage(url)
// wait rendering
await new Promise(resolve => setTimeout(resolve, 1000))
// load all pages
let doc_pages = await page.$$("div.outer_page_container div[id^='outer_page_']")
const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
bar.start(doc_pages.length, 0);
for (let i = 0; i < doc_pages.length; i++) {
await page.evaluate((i) => { // eslint-disable-next-line
document.getElementById(`outer_page_${(i + 1)}`).scrollIntoView() // eslint-disable-next-line
document.getElementById(`outer_page_${(i + 1)}`).style.margin = 0
}, i)
await new Promise(resolve => setTimeout(resolve, rendertime))
bar.update(i + 1);
}
bar.stop();
// pdf setting
let options = {
path: `${output}/${id}.pdf`,
printBackground: true,
}
let first_page = await page.$("div.outer_page_container div[id^='outer_page_']")
let style = await first_page.evaluate((el) => el.getAttribute("style"))
if (style.includes("width:") && style.includes("height:")) {
options.height = parseInt(style.split("height:")[1].split("px")[0].trim())
options.width = parseInt(style.split("width:")[1].split("px")[0].trim())
}
// show doc only
await page.evaluate(() => { // eslint-disable-next-line
document.body.innerHTML = document.querySelector("div.outer_page_container").innerHTML
})
await directoryIo.create(path.dirname(options.path))
await page.pdf(options);
console.log(`Generated: ${options.path}`)
await page.close()
await puppeteerSg.close()
} else {
throw new Error(`Unsupported URL: ${url}`)
}
}
async embeds_image(url) {
let deviceScaleFactor = 2
const m = scribdRegex.EMBED.exec(url)
if (m) {
let id = m[1]
// prepare temp dir
let dir = `${output}/${id}`
await directoryIo.create(dir)
// navigate to scribd
let page = await puppeteerSg.getPage(url)
// wait rendering
await new Promise(resolve => setTimeout(resolve, 1000))
// hide blockers
let doc_container = await page.$("div.document_scroller")
await doc_container.evaluate((el) => {
el["style"]["bottom"] = "0px"
el["style"]["margin-top"] = "0px"
});
let doc_toolbar = await page.$("div.toolbar_drop")
await doc_toolbar.evaluate((el) => el["style"]["display"] = "none");
// download images
let doc_pages = await page.$$("div.outer_page_container div[id^='outer_page_']")
let images = []
const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
bar.start(doc_pages.length, 0);
for (let i = 0; i < doc_pages.length; i++) {
await page.evaluate((i) => { // eslint-disable-next-line
document.getElementById(`outer_page_${(i + 1)}`).scrollIntoView()
}, i)
let width = 1191
let height = 1684
let style = await doc_pages[i].evaluate((el) => el.getAttribute("style"));
if (style.includes("width:") && style.includes("height:")) {
height = Math.ceil(width * parseInt(style.split("height:")[1].split("px")[0].trim()) / parseInt(style.split("width:")[1].split("px")[0].trim()))
}
await page.setViewport({ width: width, height: height, deviceScaleFactor: deviceScaleFactor });
let path = `${dir}/${(i + 1).toString().padStart(4, 0)}.png`
await doc_pages[i].screenshot({ path: path });
let metadata = await sharp(path).metadata()
images.push(new Image(
path,
metadata.width,
metadata.height
))
bar.update(i + 1);
}
bar.stop();
// generate pdf
await pdfGenerator.generate(images, `${output}/${id}.pdf`)
// remove temp dir
directoryIo.remove(`${output}/${id}`)
await page.close()
await puppeteerSg.close()
} else {
throw new Error(`Unsupported URL: ${url}`)
}
}
}
export const scribdDownloader = new ScribdDownloader() |