2ch commited on
Commit
53a8bc7
·
verified ·
1 Parent(s): 5dea88e

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +4 -10
server.js CHANGED
@@ -4,7 +4,7 @@ import { Readability } from '@mozilla/readability';
4
  import { JSDOM } from 'jsdom';
5
  import TurndownService from 'turndown';
6
 
7
- // Initialize HTML to Markdown converter
8
  const turndownService = new TurndownService({
9
  headingStyle: 'atx',
10
  codeBlockStyle: 'fenced'
@@ -13,18 +13,15 @@ const turndownService = new TurndownService({
13
  class WebsiteParser {
14
  async fetchAndParse(url) {
15
  try {
16
- // Fetch the webpage
17
  const response = await axios.get(url, {
18
  headers: {
19
  'User-Agent': 'Mozilla/5.0 (compatible; MCPBot/1.0)'
20
  }
21
  });
22
 
23
- // Create a DOM from the HTML
24
  const dom = new JSDOM(response.data, { url });
25
  const document = dom.window.document;
26
 
27
- // Use Readability to extract main content
28
  const reader = new Readability(document);
29
  const article = reader.parse();
30
 
@@ -32,7 +29,6 @@ class WebsiteParser {
32
  throw new Error('Failed to parse content');
33
  }
34
 
35
- // Convert HTML to Markdown
36
  const markdown = turndownService.turndown(article.content);
37
 
38
  return {
@@ -43,7 +39,7 @@ class WebsiteParser {
43
  siteName: article.siteName
44
  };
45
  } catch (error) {
46
- throw new Error(`Failed to fetch or parse content: ${error.message}`);
47
  }
48
  }
49
  }
@@ -52,16 +48,14 @@ const app = express();
52
  const PORT = process.env.PORT || 7860;
53
  const parser = new WebsiteParser();
54
 
55
- // Парсинг JSON тела запроса
56
  app.use(express.json());
57
 
58
- // Эндпоинт для парсинга URL
59
  app.post('/parse', async (req, res) => {
60
  try {
61
  const { url } = req.body;
62
 
63
  if (!url) {
64
- return res.status(400).json({ error: 'URL is required' });
65
  }
66
 
67
  const result = await parser.fetchAndParse(url);
@@ -83,7 +77,7 @@ app.post('/parse', async (req, res) => {
83
  });
84
 
85
  app.get('/', (req, res) => {
86
- res.send('Website Parser Service is running');
87
  });
88
 
89
  app.listen(PORT, () => {
 
4
  import { JSDOM } from 'jsdom';
5
  import TurndownService from 'turndown';
6
 
7
+
8
  const turndownService = new TurndownService({
9
  headingStyle: 'atx',
10
  codeBlockStyle: 'fenced'
 
13
  class WebsiteParser {
14
  async fetchAndParse(url) {
15
  try {
 
16
  const response = await axios.get(url, {
17
  headers: {
18
  'User-Agent': 'Mozilla/5.0 (compatible; MCPBot/1.0)'
19
  }
20
  });
21
 
 
22
  const dom = new JSDOM(response.data, { url });
23
  const document = dom.window.document;
24
 
 
25
  const reader = new Readability(document);
26
  const article = reader.parse();
27
 
 
29
  throw new Error('Failed to parse content');
30
  }
31
 
 
32
  const markdown = turndownService.turndown(article.content);
33
 
34
  return {
 
39
  siteName: article.siteName
40
  };
41
  } catch (error) {
42
+ throw new Error(`ошибка парсинга или получения страницы: ${error.message}`);
43
  }
44
  }
45
  }
 
48
  const PORT = process.env.PORT || 7860;
49
  const parser = new WebsiteParser();
50
 
 
51
  app.use(express.json());
52
 
 
53
  app.post('/parse', async (req, res) => {
54
  try {
55
  const { url } = req.body;
56
 
57
  if (!url) {
58
+ return res.status(400).json({ error: 'необходимо указать URL!' });
59
  }
60
 
61
  const result = await parser.fetchAndParse(url);
 
77
  });
78
 
79
  app.get('/', (req, res) => {
80
+ res.send(`curl -X POST https://prolapse-read.hf.space/parse -H "Content-Type: application/json" -d '{"url": "https://habr.com/ru/companies/serverspace/articles/869252/"}'`);
81
  });
82
 
83
  app.listen(PORT, () => {