File size: 2,924 Bytes
1c92863
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
const axios = require('axios');
const cheerio = require('cheerio');
const { spawn } = require('child_process');
const fs = require('fs');
const TurndownService = require('turndown');

const turndownService = new TurndownService();

const essaysListUrl = 'http://www.paulgraham.com/articles.html';

async function main() {
    try {
        const essays = await getEssayLinks();
        essays.reverse();
        const essayTexts = await Promise.all(essays.map((essay) => getEssayText(essay)));

        const combinedText = essayTexts.join('\n\n');
        console.log('Combined text:', combinedText);
        fs.writeFileSync('paul_graham_essays.txt', combinedText);

        const outputFile = 'paul_graham_essays.epub';
        await generateEpub('paul_graham_essays.txt', outputFile);
        console.log(`EPUB file generated: ${outputFile}`);
    } catch (error) {
        console.error('Error:', error);
    }
}

function generateEpub(inputFile, outputFile) {
    return new Promise((resolve, reject) => {
        const pandoc = spawn('pandoc', [
            inputFile,
            '-o',
            outputFile,
            '--toc',
            '--toc-depth=1',
            '--metadata',
            'title="Paul Graham Essays"'
        ]);

        pandoc.stdout.on('data', (data) => {
            console.log(`stdout: ${data}`);
        });

        pandoc.stderr.on('data', (data) => {
            console.error(`stderr: ${data}`);
        });

        pandoc.on('close', (code) => {
            if (code === 0) {
                resolve();
            } else {
                reject(new Error(`pandoc exited with code ${code}`));
            }
        });
    });
}
const baseUrl = 'http://www.paulgraham.com/';

async function getEssayLinks() {
    const response = await axios.get(essaysListUrl);
    const $ = cheerio.load(response.data);

    const links = [];

    $('a').each((_, link) => {
        const url = $(link).attr('href');
        if (url && url.endsWith('.html')) {
            links.push(baseUrl + url);
        }
    });

    return links;
}

async function getEssayText(url) {
    try {
        const response = await axios.get(url);
        const $ = cheerio.load(response.data);

        $('script').remove();
        $('img').remove();

        const title = $('title').text();
        const htmlContent = $('body').html();
        const markdownContent = turndownService.turndown(htmlContent);

        return `\n\n# ${title}\n\n${markdownContent}`;
    } catch (error) {
        console.error(`Error fetching essay: ${url}\n${error}`);
        return '';
    }
}

main().then(() => {
    const outputFile = 'paul_graham_essays.epub';
    generateEpub('paul_graham_essays.txt', outputFile, (error) => {
        if (error) {
            console.error('Error generating EPUB file:', error);
            return;
        }
        console.log(`EPUB file generated: ${outputFile}`);
    });
});