File size: 2,154 Bytes
2964f16 a4421f0 53a8bc7 a4421f0 101262d a4421f0 53a8bc7 a4421f0 b44a9de a4421f0 b44a9de a4421f0 b44a9de a4421f0 b44a9de 53a8bc7 b44a9de a4421f0 b44a9de a4421f0 b44a9de a4421f0 2964f16 53a8bc7 2964f16 a4421f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import express from 'express';
import axios from 'axios';
import { Readability } from '@mozilla/readability';
import { JSDOM } from 'jsdom';
import TurndownService from 'turndown';
const turndownService = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced'
});
class WebsiteParser {
async fetchAndParse(url) {
try {
const response = await axios.get(url, {
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; MCPBot/1.0)'
}
});
const dom = new JSDOM(response.data, { url });
const document = dom.window.document;
const reader = new Readability(document);
const article = reader.parse();
if (!article) {
throw new Error('не удалось спарсить страницу');
}
const markdown = turndownService.turndown(article.content);
return {
title: article.title,
content: markdown,
excerpt: article.excerpt,
byline: article.byline,
siteName: article.siteName
};
} catch (error) {
throw new Error(`ошибка парсинга или получения страницы: ${error.message}`);
}
}
}
const app = express();
const PORT = process.env.PORT || 7860;
const parser = new WebsiteParser();
app.use(express.json());
app.post('/parse', async (req, res) => {
try {
const { url } = req.body;
if (!url) {
return res.status(400).json({ error: 'необходимо указать URL!' });
}
const result = await parser.fetchAndParse(url);
res.json({
title: result.title,
content: result.content,
metadata: {
excerpt: result.excerpt,
byline: result.byline,
siteName: result.siteName
}
});
} catch (error) {
res.status(500).json({
error: error.message
});
}
});
app.get('/', (req, res) => {
res.send(`curl -X POST https://prolapse-read.hf.space/parse -H "Content-Type: application/json" -d '{"url": "https://habr.com/ru/companies/serverspace/articles/869252/"}'`);
});
app.listen(PORT, () => {
console.log(`Server running on port ${PORT}`);
});
|