Spaces:
Sleeping
Sleeping
File size: 4,588 Bytes
8227e25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
from typing import Any, Literal
import httpx
import traceback
from mcp.server.fastmcp import FastMCP
# Initialize FastMCP server
mcp = FastMCP("arxiv-omar")
# Constants
CUSTOM_ARXIV_API_BASE = "https://om4r932-arxiv.hf.space"
DDG_API_BASE = "https://ychkhan-ptt-endpoints.hf.space"
API_3GPP_BASE = "https://organizedprogrammers-3gppdocfinder.hf.space"
# Helpers
async def make_request(url: str, data: dict = None) -> dict[str, Any] | None:
if data is None:
return None
headers = {
"Accept": "application/json"
}
async with httpx.AsyncClient(verify=False) as client:
try:
response = await client.post(url, headers=headers, json=data)
print(response)
response.raise_for_status()
return response.json()
except Exception as e:
traceback.print_exception(e)
return None
def format_search(pub_id: str, content: dict) -> str:
return f"""
arXiv publication ID : {pub_id}
Title : {content["title"]}
Authors : {content["authors"]}
Release Date : {content["date"]}
Abstract : {content["abstract"]}
PDF link : {content["pdf"]}
"""
def format_extract(message: dict) -> str:
return f"""
Title of PDF : {message.get("title", "No title has been found")}
Text : {message.get("text", "No text !")}
"""
def format_result_search(page: dict) -> str:
return f"""
Title : {page.get("title", "No titles found !")}
Little description : {page.get("body", "No description")}
PDF url : {page.get("url", None)}
"""
def format_3gpp_doc_result(result: dict, release: int = None) -> str:
return f"""
Document ID : {result.get("doc_id")}
Release version : {release if release is not None else "Not specified"}
URL : {result.get("url", "No URL found !")}
"""
# Tools
@mcp.tool()
async def get_publications(keyword: str, limit: int = 15) -> str:
"""
Get arXiv publications based on keywords and limit of documents
Args:
keyword: Keywords separated by spaces
limit: Numbers of maximum publications returned (by default, 15)
"""
url = f"{CUSTOM_ARXIV_API_BASE}/search"
data = await make_request(url, data={'keyword': keyword, 'limit': limit})
if data["error"]:
return data["message"]
if not data:
return "Unable to fetch publications"
if len(data["message"].keys()) == 0:
return "No publications found"
publications = [format_search(pub_id, content) for (pub_id, content) in data["message"].items()]
return "\n--\n".join(publications)
@mcp.tool()
async def web_pdf_search(query: str) -> str:
"""
Search on the Web (with DuckDuckGo search engine) to get PDF documents based on the keywords
Args:
query: Keywords to search documents on the Web
"""
url = f"{DDG_API_BASE}/search"
data = await make_request(url, data={"query": query})
if not data:
return "Unable to fetch results"
if len(data["results"]) == 0:
return "No results found"
results = [format_result_search(result) for result in data["results"]]
return "\n--\n".join(results)
@mcp.tool()
async def get_3gpp_doc_url_byID(doc_id: str, release: int = None):
"""
Get 3GPP Technical Document URL by their document ID.
Args:
doc_id: Document ID (i.e. C4-125411, SP-551242, 31.101)
release : The release version of the document (by default, None)
"""
url = f"{API_3GPP_BASE}/find"
data = await make_request(url, data={"doc_id": doc_id, "release": release})
if not data:
return "Unable to search document in 3GPP"
return format_3gpp_doc_result(data, release)
@mcp.tool()
async def get_pdf_text(pdf_url: str, limit_page: int = -1) -> str:
"""
Extract the text from the URL pointing to a PDF file
Args:
pdf_url: URL to a PDF document
limit_page: How many pages the user wants to extract the content (default: -1 for all pages)
"""
url = f"{CUSTOM_ARXIV_API_BASE}/extract_pdf/url"
data = {"url": pdf_url}
if limit_page != -1:
data["page_num"] = limit_page
data = await make_request(url, data=data)
if data["error"]:
return data["message"]
if not data:
return "Unable to extract PDF text"
if len(data["message"].keys()) == 0:
return "No text can be extracted from this PDF"
return format_extract(data["message"])
if __name__ == "__main__":
mcp.run(transport="stdio") |