Spaces:

prolapse
/

read

Sleeping

App Files Files Community

read / server.js

2ch's picture

2ch

Create server.js

a4421f0 verified 4 months ago

3.57 kB

	import { Server } from "@modelcontextprotocol/sdk/server/index.js";
	import { HttpServerTransport } from "@modelcontextprotocol/sdk/server/http.js";
	import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
	import { ErrorCode, McpError, ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprotocol/sdk/types.js";
	import axios from 'axios';
	import { Readability } from '@mozilla/readability';
	import { JSDOM } from 'jsdom';
	import TurndownService from 'turndown';

	// Initialize HTML to Markdown converter
	const turndownService = new TurndownService({
	headingStyle: 'atx',
	codeBlockStyle: 'fenced'
	});

	class WebsiteParser {
	async fetchAndParse(url) {
	try {
	// Fetch the webpage
	const response = await axios.get(url, {
	headers: {
	'User-Agent': 'Mozilla/5.0 (compatible; MCPBot/1.0)'
	}
	});

	// Create a DOM from the HTML
	const dom = new JSDOM(response.data, { url });
	const document = dom.window.document;

	// Use Readability to extract main content
	const reader = new Readability(document);
	const article = reader.parse();

	if (!article) {
	throw new Error('Failed to parse content');
	}

	// Convert HTML to Markdown
	const markdown = turndownService.turndown(article.content);

	return {
	title: article.title,
	content: markdown,
	excerpt: article.excerpt,
	byline: article.byline,
	siteName: article.siteName
	};
	} catch (error) {
	throw new Error(`Failed to fetch or parse content: ${error.message}`);
	}
	}
	}

	// Create MCP server instance
	const server = new Server({
	name: "server-readability-parser",
	version: "1.0.0"
	}, {
	capabilities: { tools: {} }
	});

	const parser = new WebsiteParser();

	// Define available tools
	server.setRequestHandler(ListToolsRequestSchema, async () => ({
	tools: [{
	name: "parse",
	description: "Extracts and transforms webpage content into clean, LLM-optimized Markdown. Returns article title, main content, excerpt, byline and site name. Uses Mozilla's Readability algorithm to remove ads, navigation, footers and non-essential elements while preserving the core content structure.",
	inputSchema: {
	type: "object",
	properties: {
	url: {
	type: "string",
	description: "The website URL to parse"
	}
	},
	required: ["url"]
	}
	}]
	}));

	// Handle tool execution
	server.setRequestHandler(CallToolRequestSchema, async (request) => {
	const { name, arguments: args } = request.params;

	if (name !== "parse") {
	throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`);
	}

	if (!args?.url) {
	throw new McpError(ErrorCode.InvalidParams, "URL is required");
	}

	try {
	const result = await parser.fetchAndParse(args.url);

	return {
	content: [{
	type: "text",
	text: JSON.stringify({
	title: result.title,
	content: result.content,
	metadata: {
	excerpt: result.excerpt,
	byline: result.byline,
	siteName: result.siteName
	}
	}, null, 2)
	}]
	};
	} catch (error) {
	return {
	isError: true,
	content: [{
	type: "text",
	text: `Error: ${error.message}`
	}]
	};
	}
	});

	// Start server
	const transport = new HttpServerTransport({
	port: 7860
	});

	server.connect(transport).catch(error => {
	console.error(`Server failed to start: ${error.message}`);
	process.exit(1);
	});