|
import { Server } from "@modelcontextprotocol/sdk/server/index.js"; |
|
import { HttpServerTransport } from "@modelcontextprotocol/sdk/server/http.js"; |
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; |
|
import { ErrorCode, McpError, ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprotocol/sdk/types.js"; |
|
import axios from 'axios'; |
|
import { Readability } from '@mozilla/readability'; |
|
import { JSDOM } from 'jsdom'; |
|
import TurndownService from 'turndown'; |
|
|
|
|
|
const turndownService = new TurndownService({ |
|
headingStyle: 'atx', |
|
codeBlockStyle: 'fenced' |
|
}); |
|
|
|
class WebsiteParser { |
|
async fetchAndParse(url) { |
|
try { |
|
|
|
const response = await axios.get(url, { |
|
headers: { |
|
'User-Agent': 'Mozilla/5.0 (compatible; MCPBot/1.0)' |
|
} |
|
}); |
|
|
|
|
|
const dom = new JSDOM(response.data, { url }); |
|
const document = dom.window.document; |
|
|
|
|
|
const reader = new Readability(document); |
|
const article = reader.parse(); |
|
|
|
if (!article) { |
|
throw new Error('Failed to parse content'); |
|
} |
|
|
|
|
|
const markdown = turndownService.turndown(article.content); |
|
|
|
return { |
|
title: article.title, |
|
content: markdown, |
|
excerpt: article.excerpt, |
|
byline: article.byline, |
|
siteName: article.siteName |
|
}; |
|
} catch (error) { |
|
throw new Error(`Failed to fetch or parse content: ${error.message}`); |
|
} |
|
} |
|
} |
|
|
|
|
|
const server = new Server({ |
|
name: "server-readability-parser", |
|
version: "1.0.0" |
|
}, { |
|
capabilities: { tools: {} } |
|
}); |
|
|
|
const parser = new WebsiteParser(); |
|
|
|
|
|
server.setRequestHandler(ListToolsRequestSchema, async () => ({ |
|
tools: [{ |
|
name: "parse", |
|
description: "Extracts and transforms webpage content into clean, LLM-optimized Markdown. Returns article title, main content, excerpt, byline and site name. Uses Mozilla's Readability algorithm to remove ads, navigation, footers and non-essential elements while preserving the core content structure.", |
|
inputSchema: { |
|
type: "object", |
|
properties: { |
|
url: { |
|
type: "string", |
|
description: "The website URL to parse" |
|
} |
|
}, |
|
required: ["url"] |
|
} |
|
}] |
|
})); |
|
|
|
|
|
server.setRequestHandler(CallToolRequestSchema, async (request) => { |
|
const { name, arguments: args } = request.params; |
|
|
|
if (name !== "parse") { |
|
throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); |
|
} |
|
|
|
if (!args?.url) { |
|
throw new McpError(ErrorCode.InvalidParams, "URL is required"); |
|
} |
|
|
|
try { |
|
const result = await parser.fetchAndParse(args.url); |
|
|
|
return { |
|
content: [{ |
|
type: "text", |
|
text: JSON.stringify({ |
|
title: result.title, |
|
content: result.content, |
|
metadata: { |
|
excerpt: result.excerpt, |
|
byline: result.byline, |
|
siteName: result.siteName |
|
} |
|
}, null, 2) |
|
}] |
|
}; |
|
} catch (error) { |
|
return { |
|
isError: true, |
|
content: [{ |
|
type: "text", |
|
text: `Error: ${error.message}` |
|
}] |
|
}; |
|
} |
|
}); |
|
|
|
|
|
const transport = new HttpServerTransport({ |
|
port: 7860 |
|
}); |
|
|
|
server.connect(transport).catch(error => { |
|
console.error(`Server failed to start: ${error.message}`); |
|
process.exit(1); |
|
}); |
|
|