File size: 3,567 Bytes
a4421f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { HttpServerTransport } from "@modelcontextprotocol/sdk/server/http.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { ErrorCode, McpError, ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprotocol/sdk/types.js";
import axios from 'axios';
import { Readability } from '@mozilla/readability';
import { JSDOM } from 'jsdom';
import TurndownService from 'turndown';
// Initialize HTML to Markdown converter
const turndownService = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced'
});
class WebsiteParser {
async fetchAndParse(url) {
try {
// Fetch the webpage
const response = await axios.get(url, {
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; MCPBot/1.0)'
}
});
// Create a DOM from the HTML
const dom = new JSDOM(response.data, { url });
const document = dom.window.document;
// Use Readability to extract main content
const reader = new Readability(document);
const article = reader.parse();
if (!article) {
throw new Error('Failed to parse content');
}
// Convert HTML to Markdown
const markdown = turndownService.turndown(article.content);
return {
title: article.title,
content: markdown,
excerpt: article.excerpt,
byline: article.byline,
siteName: article.siteName
};
} catch (error) {
throw new Error(`Failed to fetch or parse content: ${error.message}`);
}
}
}
// Create MCP server instance
const server = new Server({
name: "server-readability-parser",
version: "1.0.0"
}, {
capabilities: { tools: {} }
});
const parser = new WebsiteParser();
// Define available tools
server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: [{
name: "parse",
description: "Extracts and transforms webpage content into clean, LLM-optimized Markdown. Returns article title, main content, excerpt, byline and site name. Uses Mozilla's Readability algorithm to remove ads, navigation, footers and non-essential elements while preserving the core content structure.",
inputSchema: {
type: "object",
properties: {
url: {
type: "string",
description: "The website URL to parse"
}
},
required: ["url"]
}
}]
}));
// Handle tool execution
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
if (name !== "parse") {
throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`);
}
if (!args?.url) {
throw new McpError(ErrorCode.InvalidParams, "URL is required");
}
try {
const result = await parser.fetchAndParse(args.url);
return {
content: [{
type: "text",
text: JSON.stringify({
title: result.title,
content: result.content,
metadata: {
excerpt: result.excerpt,
byline: result.byline,
siteName: result.siteName
}
}, null, 2)
}]
};
} catch (error) {
return {
isError: true,
content: [{
type: "text",
text: `Error: ${error.message}`
}]
};
}
});
// Start server
const transport = new HttpServerTransport({
port: 7860
});
server.connect(transport).catch(error => {
console.error(`Server failed to start: ${error.message}`);
process.exit(1);
});
|