File size: 3,446 Bytes
8ce48d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from mcp.server import Server
from mcp.server.fastmcp import FastMCP, Context
from mcp.server.sse import SseServerTransport
from starlette.applications import Starlette
from starlette.requests import Request
from starlette.routing import Mount, Route
from pdf2zh import translate_stream
from pdf2zh.doclayout import ModelInstance
from pathlib import Path

import contextlib
import io
import os


def create_mcp_app() -> FastMCP:
    mcp = FastMCP("pdf2zh")

    @mcp.tool()
    async def translate_pdf(
        file: str, lang_in: str, lang_out: str, ctx: Context
    ) -> str:
        """
        translate given pdf. Argument `file` is absolute path of input pdf,
        `lang_in` and `lang_out` is translate from and to language, and
        should be like google translate lang_code. `lang_in` can be `auto`
        if you can't determine input language.
        """

        with open(file, "rb") as f:
            file_bytes = f.read()
        await ctx.log(level="info", message=f"start translate {file}")
        with contextlib.redirect_stdout(io.StringIO()):
            doc_mono_bytes, doc_dual_bytes = translate_stream(
                file_bytes,
                lang_in=lang_in,
                lang_out=lang_out,
                service="google",
                model=ModelInstance.value,
                thread=4,
            )
        await ctx.log(level="info", message="translate complete")
        output_path = Path(os.path.dirname(file))
        filename = os.path.splitext(os.path.basename(file))[0]
        doc_mono = output_path / f"{filename}-mono.pdf"
        doc_dual = output_path / f"{filename}-dual.pdf"
        with open(doc_mono, "wb") as f:
            f.write(doc_mono_bytes)
        with open(doc_dual, "wb") as f:
            f.write(doc_dual_bytes)
        return f"""------------
    translate complete
    mono pdf file: {doc_mono.absolute()}
    dual pdf file: {doc_dual.absolute()}
    """

    return mcp


def create_starlette_app(mcp_server: Server, *, debug: bool = False) -> Starlette:
    sse = SseServerTransport("/messages/")

    async def handle_sse(request: Request) -> None:
        async with sse.connect_sse(request.scope, request.receive, request._send) as (
            read_stream,
            write_stream,
        ):
            await mcp_server.run(
                read_stream, write_stream, mcp_server.create_initialization_options()
            )

    return Starlette(
        debug=debug,
        routes=[
            Route("/sse", endpoint=handle_sse),
            Mount("/messages/", app=sse.handle_post_message),
        ],
    )


if __name__ == "__main__":
    import argparse

    mcp = create_mcp_app()
    mcp_server = mcp._mcp_server
    parser = argparse.ArgumentParser(description="Run MCP SSE-based PDF2ZH server")

    parser.add_argument(
        "--sse",
        default=False,
        action="store_true",
        help="Run the server with SSE transport or STDIO",
    )
    parser.add_argument(
        "--host", type=str, default="127.0.0.1", required=False, help="Host to bind"
    )
    parser.add_argument(
        "--port", type=int, default=3001, required=False, help="Port to bind"
    )

    args = parser.parse_args()
    if args.sse and args.host and args.port:
        import uvicorn

        starlette_app = create_starlette_app(mcp_server, debug=True)
        uvicorn.run(starlette_app, host=args.host, port=args.port)
    else:
        mcp.run()