Spaces:

OrganizedProgrammers
/

MCPSynapseChat

Sleeping

App Files Files Community

Omar ID EL MOUMEN commited on May 2

Commit

6c4ff0e

1 Parent(s): 8227e25

Add new version of MCP server

Browse files

Files changed (1) hide show

server.py +119 -102

server.py CHANGED Viewed

@@ -1,24 +1,23 @@
-from typing import Any, Literal
 import httpx
 import traceback
 from mcp.server.fastmcp import FastMCP
 # Initialize FastMCP server
-mcp = FastMCP("arxiv-omar")
-# Constants
-CUSTOM_ARXIV_API_BASE = "https://om4r932-arxiv.hf.space"
-DDG_API_BASE = "https://ychkhan-ptt-endpoints.hf.space"
-API_3GPP_BASE = "https://organizedprogrammers-3gppdocfinder.hf.space"
-# Helpers
-async def make_request(url: str, data: dict = None) -> dict[str, Any] | None:
-    if data is None:
-        return None
-    headers = {
-        "Accept": "application/json"
-    }
-    async with httpx.AsyncClient(verify=False) as client:
         try:
             response = await client.post(url, headers=headers, json=data)
             print(response)
@@ -26,118 +25,136 @@ async def make_request(url: str, data: dict = None) -> dict[str, Any] | None:
             return response.json()
         except Exception as e:
             traceback.print_exception(e)
-            return None
-def format_search(pub_id: str, content: dict) -> str:
-    return f"""
-        arXiv publication ID : {pub_id}
-        Title : {content["title"]}
-        Authors : {content["authors"]}
-        Release Date : {content["date"]}
-        Abstract : {content["abstract"]}
-        PDF link : {content["pdf"]}
-    """
-def format_extract(message: dict) -> str:
-    return f"""
-        Title of PDF : {message.get("title", "No title has been found")}
-        Text : {message.get("text", "No text !")}
-    """
-def format_result_search(page: dict) -> str:
-    return f"""
-        Title : {page.get("title", "No titles found !")}
-        Little description : {page.get("body", "No description")}
-        PDF url : {page.get("url", None)}
-    """
-def format_3gpp_doc_result(result: dict, release: int = None) -> str:
-    return f"""
-        Document ID : {result.get("doc_id")}
-        Release version : {release if release is not None else "Not specified"}
-        URL : {result.get("url", "No URL found !")}
-    """
-# Tools
 @mcp.tool()
-async def get_publications(keyword: str, limit: int = 15) -> str:
     """
-    Get arXiv publications based on keywords and limit of documents
-    Args:
-        keyword: Keywords separated by spaces
-        limit: Numbers of maximum publications returned (by default, 15)
     """
-    url = f"{CUSTOM_ARXIV_API_BASE}/search"
-    data = await make_request(url, data={'keyword': keyword, 'limit': limit})
     if data["error"]:
         return data["message"]
-    if not data:
-        return "Unable to fetch publications"
-    if len(data["message"].keys()) == 0:
-        return "No publications found"
-    publications = [format_search(pub_id, content) for (pub_id, content) in data["message"].items()]
-    return "\n--\n".join(publications)
 @mcp.tool()
-async def web_pdf_search(query: str) -> str:
     """
-    Search on the Web (with DuckDuckGo search engine) to get PDF documents based on the keywords
-    Args:
-        query: Keywords to search documents on the Web
     """
-    url = f"{DDG_API_BASE}/search"
-    data = await make_request(url, data={"query": query})
-    if not data:
-        return "Unable to fetch results"
-    if len(data["results"]) == 0:
-        return "No results found"
-    results = [format_result_search(result) for result in data["results"]]
-    return "\n--\n".join(results)
 @mcp.tool()
-async def get_3gpp_doc_url_byID(doc_id: str, release: int = None):
     """
-    Get 3GPP Technical Document URL by their document ID.
-    Args:
-        doc_id: Document ID (i.e. C4-125411, SP-551242, 31.101)
-        release : The release version of the document (by default, None)
     """
-    url = f"{API_3GPP_BASE}/find"
-    data = await make_request(url, data={"doc_id": doc_id, "release": release})
-    if not data:
-        return "Unable to search document in 3GPP"
-    return format_3gpp_doc_result(data, release)
 @mcp.tool()
-async def get_pdf_text(pdf_url: str, limit_page: int = -1) -> str:
     """
-    Extract the text from the URL pointing to a PDF file
-    Args:
-        pdf_url: URL to a PDF document
-        limit_page: How many pages the user wants to extract the content (default: -1 for all pages)
     """
-    url = f"{CUSTOM_ARXIV_API_BASE}/extract_pdf/url"
-    data = {"url": pdf_url}
-    if limit_page != -1:
-        data["page_num"] = limit_page
-    data = await make_request(url, data=data)
-    if data["error"]:
-        return data["message"]
-    if not data:
-        return "Unable to extract PDF text"
-    if len(data["message"].keys()) == 0:
-        return "No text can be extracted from this PDF"
-    return format_extract(data["message"])
 if __name__ == "__main__":
     mcp.run(transport="stdio")

+from typing import Any, List, Literal, Dict, Optional
 import httpx
 import traceback
 from mcp.server.fastmcp import FastMCP
 # Initialize FastMCP server
+mcp = FastMCP("patent-problematic-generator-helper")
+# API used
+ARXIV_BASE = "https://om4r932-arxiv.hf.space"
+DUCKDUCKGO_BASE = "https://ychkhan-ptt-endpoints.hf.space"
+DOC3GPPFINDER_BASE = "https://organizedprogrammers-3gppdocfinder.hf.space"
+# Request function
+async def post_data_to_api(url, data = None):
+    if data is None or data == {}:
+        return (None, "")
+    headers = {"Accept": "application/json"}
+    async with httpx.AsyncClient(verify=False, timeout=180) as client:
         try:
             response = await client.post(url, headers=headers, json=data)
             print(response)
             return response.json()
         except Exception as e:
             traceback.print_exception(e)
+            return (None, e)
+async def fake_post_data_to_api(url, params = None):
+    if params is None or params == {}:
+        return (None, "")
+    headers = {"Accept": "application/json"}
+    async with httpx.AsyncClient(verify=False, timeout=180) as client:
+        try:
+            response = await client.post(url, headers=headers, params=params)
+            print(response)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            traceback.print_exception(e)
+            return (None, e)
+async def get_data_from_api(url):
+    headers = {"Accept": "application/json"}
+    async with httpx.AsyncClient(verify=False, timeout=180) as client:
+        try:
+            response = await client.get(url, headers=headers)
+            print(response)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            traceback.print_exception(e)
+            return (None, e)
+# Tools
+# arXiv
 @mcp.tool()
+async def get_arxiv_publications(keywords: str, limit: int):
     """
+    Search arXiv publications based on keywords and a limit of documents printed
+    Arguments available: keywords: string [mandatory], limit: integer [mandatory, default = 5]
     """
+    endpoint = ARXIV_BASE + "/search"
+    data = await post_data_to_api(endpoint, {"keyword": keywords, "limit": limit})
+    if isinstance(data, tuple) and data[0] is None:
+        return f"An error has occured while getting publications: {data[1]}"
     if data["error"]:
         return data["message"]
+    if len(data) < 1:
+        return "No publications has been found"
+    results = data["message"]
+    output = []
+    for pub, metadata in results.items():
+        output.append(f"arXiv pub ID: {pub}\nTitle: {metadata['title']}\nAuthors: {metadata['authors']}\nPublished on: {metadata['date']}\nAbstract: {metadata['abstract']}\nPDF URL: {metadata['pdf']}\n")
+    return "-\n".join(output)
+# 3GPP Doc Finder
 @mcp.tool()
+async def get_document_url(doc_id: str, release: int = None):
     """
+    Find 3GPP document (TSG docs, specifications or workshop files) only by their ID [note that it will only work with keywords] (and release if it's a specification only) and return their position via a URL (and a scope if it's a specification)
+    Arguments available: doc_id: string [mandatory], release: integer [optional for every case]
     """
+    endpoint = DOC3GPPFINDER_BASE + "/find"
+    data = await post_data_to_api(endpoint, {"doc_id": doc_id, "release": release})
+    if isinstance(data, tuple) and data[0] is None:
+        return f"An error while searching publications: {data[1]}"
+    output = f'The document {doc_id} is available via this URL : {data.get("url", "No URL found !")}. '
+    output += f'\nThe scope of the document: {data["scope"]}' if data.get("scope", None) is not None or data.get("scope", None) == "" else ""
+    return output
 @mcp.tool()
+async def search_specs(keywords: str, limit: int, release: str = None, wg: str = None, spec_type: str = None, mode: str = "and"):
     """
+    Search 3GPP specifications only by their keywords [note that it will only work with keywords](and some filters [see kwargs field])
+    Arguments available: keywords: string [mandatory, separated by space], limit [mandatory, default = 5], release: string [optional, filter] (Rel-x where x is the release version, generally the first number of the full version), wg: string [optional, filter] = working group (S1, C4, SP, ...), spec_type: string [optional, filter] (either TS or TR), mode [mandatory, default = 'and'] = search mode (and = all keywords must be in the search, or = at least one keyword in the search)
     """
+    endpoint = DOC3GPPFINDER_BASE + "/search-spec"
+    data = await post_data_to_api(endpoint, {
+        "keywords": keywords,
+        "release": release,
+        "wg": wg,
+        "spec_type": spec_type,
+        "mode": mode
+    })
+    if isinstance(data, tuple) and data[0] is None:
+        return f"An error has occured while searching specifications"
+    results = data['results'][:min(len(data['results'])-1, limit)]
+    output = []
+    for spec in results:
+        x = f"Found specification number {spec['id']} version {spec['release']}"
+        if spec['scope'] != "":
+            x += f" where {spec['scope'].lower()}\n"
+        else:
+            x += "\n"
+        output.append(x)
+    return "-\n".join(output)
 @mcp.tool()
+async def get_multiple_documents_url(doc_ids: List[str], release: int = None):
     """
+    [BATCH] Search multiple 3GPP documents (TSG docs, specifications or workshop files) [note that it will only work with document ID] (and release if it's a specification only) and return only their position via a URL
+    Arguments available: doc_ids: list of string [mandatory], release: integer [optional for every case]
     """
+    endpoint = DOC3GPPFINDER_BASE + "/batch"
+    data = await post_data_to_api(endpoint, doc_ids=doc_ids, release=release)
+    if isinstance(data, tuple) and data[0] is None:
+        return f"An error while searching publications: {data[1]}"
+    results = data["results"]
+    output = []
+    for doc_id, url in results.items():
+        output.append(f'The document {doc_id} is available via this URL: {url}\n')
+    return "-\n".join(output)
+# PTT Endpoints
+@mcp.tool()
+async def search_documents_web(query: str, data_type: str = None, limit: int = 5):
+    """
+    Search on the Web (thanks to DuckDuckGo) documents based on the user's query
+    Arguments available: query: string [mandatory], data_type: string [optional, either 'pdf', 'patent' or None (classic web search)], limit: integer [optional, default = 5]
+    """
+    endpoint = DUCKDUCKGO_BASE + "/search"
+    data = await fake_post_data_to_api(endpoint, query=query, data_type=data_type, max_references=limit)
+    if isinstance(data, tuple) and data[0] is None:
+        return f"An error while searching publications: {data[1]}"
+    results = data["results"]
+    output = []
+    for ref in results:
+        output.append(f"Title: {ref['title']}\nBody: {ref['body']}\nURL: {ref['url']}")
+    return "-\n".join(output)
 if __name__ == "__main__":
     mcp.run(transport="stdio")