File size: 2,154 Bytes
2964f16
a4421f0
 
 
 
 
53a8bc7
a4421f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101262d
a4421f0
 
 
 
 
 
 
 
 
 
 
 
53a8bc7
a4421f0
 
 
 
b44a9de
 
a4421f0
 
b44a9de
a4421f0
b44a9de
a4421f0
b44a9de
 
 
53a8bc7
b44a9de
a4421f0
b44a9de
 
 
 
 
 
 
 
 
 
 
a4421f0
b44a9de
 
 
a4421f0
 
 
2964f16
53a8bc7
2964f16
 
 
 
a4421f0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import express from 'express';
import axios from 'axios';
import { Readability } from '@mozilla/readability';
import { JSDOM } from 'jsdom';
import TurndownService from 'turndown';


const turndownService = new TurndownService({
  headingStyle: 'atx',
  codeBlockStyle: 'fenced'
});

class WebsiteParser {
  async fetchAndParse(url) {
    try {
      const response = await axios.get(url, {
        headers: {
          'User-Agent': 'Mozilla/5.0 (compatible; MCPBot/1.0)'
        }
      });

      const dom = new JSDOM(response.data, { url });
      const document = dom.window.document;

      const reader = new Readability(document);
      const article = reader.parse();

      if (!article) {
        throw new Error('не удалось спарсить страницу');
      }

      const markdown = turndownService.turndown(article.content);

      return {
        title: article.title,
        content: markdown,
        excerpt: article.excerpt,
        byline: article.byline,
        siteName: article.siteName
      };
    } catch (error) {
      throw new Error(`ошибка парсинга или получения страницы: ${error.message}`);
    }
  }
}

const app = express();
const PORT = process.env.PORT || 7860;
const parser = new WebsiteParser();

app.use(express.json());

app.post('/parse', async (req, res) => {
  try {
    const { url } = req.body;

    if (!url) {
      return res.status(400).json({ error: 'необходимо указать URL!' });
    }

    const result = await parser.fetchAndParse(url);
    
    res.json({
      title: result.title,
      content: result.content,
      metadata: {
        excerpt: result.excerpt,
        byline: result.byline,
        siteName: result.siteName
      }
    });
  } catch (error) {
    res.status(500).json({ 
      error: error.message 
    });
  }
});

app.get('/', (req, res) => {
  res.send(`curl -X POST https://prolapse-read.hf.space/parse -H "Content-Type: application/json" -d '{"url": "https://habr.com/ru/companies/serverspace/articles/869252/"}'`);
});

app.listen(PORT, () => {
  console.log(`Server running on port ${PORT}`);
});