|
import cliProgress from "cli-progress" |
|
import { puppeteerSg } from "../utils/request/PuppeteerSg.js"; |
|
import { pdfGenerator } from "../utils/io/PdfGenerator.js"; |
|
import { configLoader } from "../utils/io/ConfigLoader.js"; |
|
import { directoryIo } from "../utils/io/DirectoryIo.js" |
|
import * as scribdRegex from "../const/ScribdRegex.js" |
|
import * as scribdFlag from '../const/ScribdFlag.js' |
|
import { Image } from "../object/Image.js" |
|
import sharp from "sharp"; |
|
import path from 'path' |
|
|
|
|
|
const output = configLoader.load("DIRECTORY", "output") |
|
const rendertime = parseInt(configLoader.load("SCRIBD", "rendertime")) |
|
|
|
class ScribdDownloader { |
|
constructor() { |
|
if (!ScribdDownloader.instance) { |
|
ScribdDownloader.instance = this |
|
} |
|
return ScribdDownloader.instance |
|
} |
|
|
|
async execute(url, flag) { |
|
let fn; |
|
if (flag === scribdFlag.IMAGE) { |
|
console.log(`Mode: IMAGE`) |
|
fn = this.embeds_image |
|
} else { |
|
console.log(`Mode: DEFAULT`) |
|
fn = this.embeds_default |
|
} |
|
if (url.match(scribdRegex.DOCUMENT)) { |
|
await fn(`https://www.scribd.com/embeds/${scribdRegex.DOCUMENT.exec(url)[2]}/content`) |
|
} else if (url.match(scribdRegex.EMBED)) { |
|
await fn(url) |
|
} else { |
|
throw new Error(`Unsupported URL: ${url}`) |
|
} |
|
} |
|
|
|
async embeds_default(url) { |
|
const m = scribdRegex.EMBED.exec(url) |
|
if (m) { |
|
let id = m[1] |
|
|
|
|
|
let page = await puppeteerSg.getPage(url) |
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 1000)) |
|
|
|
|
|
let doc_pages = await page.$$("div.outer_page_container div[id^='outer_page_']") |
|
const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic); |
|
bar.start(doc_pages.length, 0); |
|
for (let i = 0; i < doc_pages.length; i++) { |
|
await page.evaluate((i) => { |
|
document.getElementById(`outer_page_${(i + 1)}`).scrollIntoView() |
|
document.getElementById(`outer_page_${(i + 1)}`).style.margin = 0 |
|
}, i) |
|
await new Promise(resolve => setTimeout(resolve, rendertime)) |
|
bar.update(i + 1); |
|
} |
|
bar.stop(); |
|
|
|
|
|
let options = { |
|
path: `${output}/${id}.pdf`, |
|
printBackground: true, |
|
} |
|
let first_page = await page.$("div.outer_page_container div[id^='outer_page_']") |
|
let style = await first_page.evaluate((el) => el.getAttribute("style")) |
|
if (style.includes("width:") && style.includes("height:")) { |
|
options.height = parseInt(style.split("height:")[1].split("px")[0].trim()) |
|
options.width = parseInt(style.split("width:")[1].split("px")[0].trim()) |
|
} |
|
|
|
|
|
await page.evaluate(() => { |
|
document.body.innerHTML = document.querySelector("div.outer_page_container").innerHTML |
|
}) |
|
|
|
await directoryIo.create(path.dirname(options.path)) |
|
await page.pdf(options); |
|
console.log(`Generated: ${options.path}`) |
|
|
|
await page.close() |
|
await puppeteerSg.close() |
|
} else { |
|
throw new Error(`Unsupported URL: ${url}`) |
|
} |
|
} |
|
|
|
async embeds_image(url) { |
|
let deviceScaleFactor = 2 |
|
const m = scribdRegex.EMBED.exec(url) |
|
if (m) { |
|
let id = m[1] |
|
|
|
|
|
let dir = `${output}/${id}` |
|
await directoryIo.create(dir) |
|
|
|
|
|
let page = await puppeteerSg.getPage(url) |
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 1000)) |
|
|
|
|
|
let doc_container = await page.$("div.document_scroller") |
|
await doc_container.evaluate((el) => { |
|
el["style"]["bottom"] = "0px" |
|
el["style"]["margin-top"] = "0px" |
|
}); |
|
let doc_toolbar = await page.$("div.toolbar_drop") |
|
await doc_toolbar.evaluate((el) => el["style"]["display"] = "none"); |
|
|
|
|
|
let doc_pages = await page.$$("div.outer_page_container div[id^='outer_page_']") |
|
let images = [] |
|
const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic); |
|
bar.start(doc_pages.length, 0); |
|
for (let i = 0; i < doc_pages.length; i++) { |
|
await page.evaluate((i) => { |
|
document.getElementById(`outer_page_${(i + 1)}`).scrollIntoView() |
|
}, i) |
|
|
|
let width = 1191 |
|
let height = 1684 |
|
let style = await doc_pages[i].evaluate((el) => el.getAttribute("style")); |
|
if (style.includes("width:") && style.includes("height:")) { |
|
height = Math.ceil(width * parseInt(style.split("height:")[1].split("px")[0].trim()) / parseInt(style.split("width:")[1].split("px")[0].trim())) |
|
} |
|
await page.setViewport({ width: width, height: height, deviceScaleFactor: deviceScaleFactor }); |
|
|
|
let path = `${dir}/${(i + 1).toString().padStart(4, 0)}.png` |
|
await doc_pages[i].screenshot({ path: path }); |
|
|
|
let metadata = await sharp(path).metadata() |
|
images.push(new Image( |
|
path, |
|
metadata.width, |
|
metadata.height |
|
)) |
|
bar.update(i + 1); |
|
} |
|
bar.stop(); |
|
|
|
|
|
await pdfGenerator.generate(images, `${output}/${id}.pdf`) |
|
|
|
|
|
directoryIo.remove(`${output}/${id}`) |
|
|
|
await page.close() |
|
await puppeteerSg.close() |
|
} else { |
|
throw new Error(`Unsupported URL: ${url}`) |
|
} |
|
} |
|
} |
|
|
|
export const scribdDownloader = new ScribdDownloader() |