const axios = require('axios'); const cheerio = require('cheerio'); const { spawn } = require('child_process'); const fs = require('fs'); const TurndownService = require('turndown'); const turndownService = new TurndownService(); const essaysListUrl = 'http://www.paulgraham.com/articles.html'; async function main() { try { const essays = await getEssayLinks(); essays.reverse(); const essayTexts = await Promise.all(essays.map((essay) => getEssayText(essay))); const combinedText = essayTexts.join('\n\n'); console.log('Combined text:', combinedText); fs.writeFileSync('paul_graham_essays.txt', combinedText); const outputFile = 'paul_graham_essays.epub'; await generateEpub('paul_graham_essays.txt', outputFile); console.log(`EPUB file generated: ${outputFile}`); } catch (error) { console.error('Error:', error); } } function generateEpub(inputFile, outputFile) { return new Promise((resolve, reject) => { const pandoc = spawn('pandoc', [ inputFile, '-o', outputFile, '--toc', '--toc-depth=1', '--metadata', 'title="Paul Graham Essays"' ]); pandoc.stdout.on('data', (data) => { console.log(`stdout: ${data}`); }); pandoc.stderr.on('data', (data) => { console.error(`stderr: ${data}`); }); pandoc.on('close', (code) => { if (code === 0) { resolve(); } else { reject(new Error(`pandoc exited with code ${code}`)); } }); }); } const baseUrl = 'http://www.paulgraham.com/'; async function getEssayLinks() { const response = await axios.get(essaysListUrl); const $ = cheerio.load(response.data); const links = []; $('a').each((_, link) => { const url = $(link).attr('href'); if (url && url.endsWith('.html')) { links.push(baseUrl + url); } }); return links; } async function getEssayText(url) { try { const response = await axios.get(url); const $ = cheerio.load(response.data); $('script').remove(); $('img').remove(); const title = $('title').text(); const htmlContent = $('body').html(); const markdownContent = turndownService.turndown(htmlContent); return `\n\n# ${title}\n\n${markdownContent}`; } catch (error) { console.error(`Error fetching essay: ${url}\n${error}`); return ''; } } main().then(() => { const outputFile = 'paul_graham_essays.epub'; generateEpub('paul_graham_essays.txt', outputFile, (error) => { if (error) { console.error('Error generating EPUB file:', error); return; } console.log(`EPUB file generated: ${outputFile}`); }); });