2ch commited on
Commit
a4421f0
·
verified ·
1 Parent(s): 829ccac

Create server.js

Browse files
Files changed (1) hide show
  1. server.js +130 -0
server.js ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { Server } from "@modelcontextprotocol/sdk/server/index.js";
2
+ import { HttpServerTransport } from "@modelcontextprotocol/sdk/server/http.js";
3
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
+ import { ErrorCode, McpError, ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprotocol/sdk/types.js";
5
+ import axios from 'axios';
6
+ import { Readability } from '@mozilla/readability';
7
+ import { JSDOM } from 'jsdom';
8
+ import TurndownService from 'turndown';
9
+
10
+ // Initialize HTML to Markdown converter
11
+ const turndownService = new TurndownService({
12
+ headingStyle: 'atx',
13
+ codeBlockStyle: 'fenced'
14
+ });
15
+
16
+ class WebsiteParser {
17
+ async fetchAndParse(url) {
18
+ try {
19
+ // Fetch the webpage
20
+ const response = await axios.get(url, {
21
+ headers: {
22
+ 'User-Agent': 'Mozilla/5.0 (compatible; MCPBot/1.0)'
23
+ }
24
+ });
25
+
26
+ // Create a DOM from the HTML
27
+ const dom = new JSDOM(response.data, { url });
28
+ const document = dom.window.document;
29
+
30
+ // Use Readability to extract main content
31
+ const reader = new Readability(document);
32
+ const article = reader.parse();
33
+
34
+ if (!article) {
35
+ throw new Error('Failed to parse content');
36
+ }
37
+
38
+ // Convert HTML to Markdown
39
+ const markdown = turndownService.turndown(article.content);
40
+
41
+ return {
42
+ title: article.title,
43
+ content: markdown,
44
+ excerpt: article.excerpt,
45
+ byline: article.byline,
46
+ siteName: article.siteName
47
+ };
48
+ } catch (error) {
49
+ throw new Error(`Failed to fetch or parse content: ${error.message}`);
50
+ }
51
+ }
52
+ }
53
+
54
+ // Create MCP server instance
55
+ const server = new Server({
56
+ name: "server-readability-parser",
57
+ version: "1.0.0"
58
+ }, {
59
+ capabilities: { tools: {} }
60
+ });
61
+
62
+ const parser = new WebsiteParser();
63
+
64
+ // Define available tools
65
+ server.setRequestHandler(ListToolsRequestSchema, async () => ({
66
+ tools: [{
67
+ name: "parse",
68
+ description: "Extracts and transforms webpage content into clean, LLM-optimized Markdown. Returns article title, main content, excerpt, byline and site name. Uses Mozilla's Readability algorithm to remove ads, navigation, footers and non-essential elements while preserving the core content structure.",
69
+ inputSchema: {
70
+ type: "object",
71
+ properties: {
72
+ url: {
73
+ type: "string",
74
+ description: "The website URL to parse"
75
+ }
76
+ },
77
+ required: ["url"]
78
+ }
79
+ }]
80
+ }));
81
+
82
+ // Handle tool execution
83
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
84
+ const { name, arguments: args } = request.params;
85
+
86
+ if (name !== "parse") {
87
+ throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`);
88
+ }
89
+
90
+ if (!args?.url) {
91
+ throw new McpError(ErrorCode.InvalidParams, "URL is required");
92
+ }
93
+
94
+ try {
95
+ const result = await parser.fetchAndParse(args.url);
96
+
97
+ return {
98
+ content: [{
99
+ type: "text",
100
+ text: JSON.stringify({
101
+ title: result.title,
102
+ content: result.content,
103
+ metadata: {
104
+ excerpt: result.excerpt,
105
+ byline: result.byline,
106
+ siteName: result.siteName
107
+ }
108
+ }, null, 2)
109
+ }]
110
+ };
111
+ } catch (error) {
112
+ return {
113
+ isError: true,
114
+ content: [{
115
+ type: "text",
116
+ text: `Error: ${error.message}`
117
+ }]
118
+ };
119
+ }
120
+ });
121
+
122
+ // Start server
123
+ const transport = new HttpServerTransport({
124
+ port: 7860
125
+ });
126
+
127
+ server.connect(transport).catch(error => {
128
+ console.error(`Server failed to start: ${error.message}`);
129
+ process.exit(1);
130
+ });