import express from 'express'; import axios from 'axios'; import { Readability } from '@mozilla/readability'; import { JSDOM } from 'jsdom'; import TurndownService from 'turndown'; const turndownService = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' }); class WebsiteParser { async fetchAndParse(url) { try { const response = await axios.get(url, { headers: { 'User-Agent': 'Mozilla/5.0 (compatible; MCPBot/1.0)' } }); const dom = new JSDOM(response.data, { url }); const document = dom.window.document; const reader = new Readability(document); const article = reader.parse(); if (!article) { throw new Error('не удалось спарсить страницу'); } const markdown = turndownService.turndown(article.content); return { title: article.title, content: markdown, excerpt: article.excerpt, byline: article.byline, siteName: article.siteName }; } catch (error) { throw new Error(`ошибка парсинга или получения страницы: ${error.message}`); } } } const app = express(); const PORT = process.env.PORT || 7860; const parser = new WebsiteParser(); app.use(express.json()); app.post('/parse', async (req, res) => { try { const { url } = req.body; if (!url) { return res.status(400).json({ error: 'необходимо указать URL!' }); } const result = await parser.fetchAndParse(url); res.json({ title: result.title, content: result.content, metadata: { excerpt: result.excerpt, byline: result.byline, siteName: result.siteName } }); } catch (error) { res.status(500).json({ error: error.message }); } }); app.get('/', (req, res) => { res.send(`curl -X POST https://prolapse-read.hf.space/parse -H "Content-Type: application/json" -d '{"url": "https://habr.com/ru/companies/serverspace/articles/869252/"}'`); }); app.listen(PORT, () => { console.log(`Server running on port ${PORT}`); });