Spaces:

OrganizedProgrammers
/

MCPGroqChat

Sleeping

File size: 7,335 Bytes

from typing import Any, List, Literal, Dict, Optional
import httpx
import traceback
from mcp.server.fastmcp import FastMCP

# Initialize FastMCP server
mcp = FastMCP("patent-problematic-generator-helper")

# API used
ARXIV_BASE = "https://om4r932-arxiv.hf.space"
DUCKDUCKGO_BASE = "https://ychkhan-ptt-endpoints.hf.space"
DOC3GPPFINDER_BASE = "https://organizedprogrammers-3gppdocfinder.hf.space"

# Request function

async def post_data_to_api(url, data = None):
    if data is None or data == {}:
        return (None, "")
    headers = {"Accept": "application/json"}
    async with httpx.AsyncClient(verify=False, timeout=180) as client:
        try:
            response = await client.post(url, headers=headers, json=data)
            print(response)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            traceback.print_exception(e)
            return (None, e)
        
async def fake_post_data_to_api(url, params = None):
    if params is None or params == {}:
        return (None, "")
    headers = {"Accept": "application/json"}
    async with httpx.AsyncClient(verify=False, timeout=180) as client:
        try:
            response = await client.post(url, headers=headers, params=params)
            print(response)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            traceback.print_exception(e)
            return (None, e)

async def get_data_from_api(url):
    headers = {"Accept": "application/json"}
    async with httpx.AsyncClient(verify=False, timeout=180) as client:
        try:
            response = await client.get(url, headers=headers)
            print(response)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            traceback.print_exception(e)
            return (None, e)

# Tools

# arXiv

@mcp.tool()
async def get_arxiv_publications(keywords: str, limit: int):
    """
    Search arXiv publications based on keywords and a limit of documents printed
    Arguments available: keywords: string [mandatory], limit: integer [mandatory, default = 5]
    """
    endpoint = ARXIV_BASE + "/search"
    data = await post_data_to_api(endpoint, {"keyword": keywords, "limit": limit})
    if isinstance(data, tuple) and data[0] is None:
        return f"An error has occured while getting publications: {data[1]}"
    if data["error"]:
        return data["message"]
    if len(data) < 1:
        return "No publications has been found"
    
    results = data["message"]
    output = []
    for pub, metadata in results.items():
        output.append(f"arXiv pub ID: {pub}\nTitle: {metadata['title']}\nAuthors: {metadata['authors']}\nPublished on: {metadata['date']}\nAbstract: {metadata['abstract']}\nPDF URL: {metadata['pdf']}\n")
    
    return "-\n".join(output)

# 3GPP Doc Finder

@mcp.tool()
async def get_document_url(doc_id: str, release: int = None):
    """
    Find 3GPP document (TSG docs, specifications or workshop files) only by their ID [note that it will only work with keywords] (and release if it's a specification only) and return their position via a URL (and a scope if it's a specification)
    Arguments available: doc_id: string [mandatory], release: integer [optional for every case]
    """
    endpoint = DOC3GPPFINDER_BASE + "/find"
    data = await post_data_to_api(endpoint, {"doc_id": doc_id, "release": release})
    if isinstance(data, tuple) and data[0] is None:
        return f"An error while searching publications: {data[1]}"
    output = f'The document {doc_id} is available via this URL : {data.get("url", "No URL found !")}. '
    output += f'\nThe scope of the document: {data["scope"]}' if data.get("scope", None) is not None or data.get("scope", None) == "" else ""
    return output
    
@mcp.tool()
async def search_specs(keywords: str, limit: int, release: str = None, wg: str = None, spec_type: str = None, mode: str = "and"):
    """
    Search 3GPP specifications only by their keywords [note that it will only work with keywords](and some filters [see kwargs field])
    Arguments available: keywords: string [mandatory, separated by space], limit [mandatory, default = 5], release: string [optional, filter] (the release version (e.g. 18, 9, 19, ...), generally the first number of the full version), wg: string [optional, filter] = working group (S1, C4, SP, ...), spec_type: string [optional, filter] (either TS or TR), mode [mandatory, default = 'and'] = search mode (and = all keywords must be in the search, or = at least one keyword in the search)
    """
    endpoint = DOC3GPPFINDER_BASE + "/search-spec"
    body = {
        "keywords": keywords,
        "mode": mode
    }

    if release is not None:
        body['release'] = release
    if wg is not None:
        body['wg'] = wg
    if spec_type is not None:
        body['spec_type'] = spec_type

    data = await post_data_to_api(endpoint, body)
    if isinstance(data, tuple) and data[0] is None:
        return f"An error has occured while searching specifications"
    results = data['results'][:min(len(data['results'])-1, limit)]
    output = []
    for spec in results:
        x = f"Found specification number {spec['id']} version {spec['release']}"
        if spec['scope'] != "":
            x += f" where {spec['scope'].lower()}\n"
        else:
            x += "\n"
        output.append(x)
    return "-\n".join(output)

@mcp.tool()
async def get_multiple_documents_url(doc_ids: List[str], release: int = None):
    """
    [BATCH] Search multiple 3GPP documents (TSG docs, specifications or workshop files) [note that it will only work with document ID] (and release if it's a specification only) and return only their position via a URL
    Arguments available: doc_ids: list of string [mandatory], release: integer [optional for every case]
    """
    endpoint = DOC3GPPFINDER_BASE + "/batch"
    body = {
        "doc_ids": doc_ids,
    }
    if release is not None:
        body['release'] = release
    data = await post_data_to_api(endpoint, body)
    if isinstance(data, tuple) and data[0] is None:
        return f"An error while searching publications: {data[1]}"
    results = data["results"]
    output = []
    for doc_id, url in results.items():
        output.append(f'The document {doc_id} is available via this URL: {url}\n')
    return "-\n".join(output)

# PTT Endpoints

@mcp.tool()
async def search_documents_web(query: str, data_type: str = "web", limit: int = 5):
    """
    Search on the Web (thanks to DuckDuckGo) documents based on the user's query
    Arguments available: query: string [mandatory], data_type: string [optional, either 'pdf', 'patent' or 'web', default = 'web'], limit: integer [optional, default = 5]
    """
    endpoint = DUCKDUCKGO_BASE + "/search"
    data = await fake_post_data_to_api(endpoint, {"query": query, "data_type": data_type, 'max_references': limit})
    if isinstance(data, tuple) and data[0] is None:
        return f"An error while searching publications: {data[1]}"
    results = data["results"]
    output = []
    for ref in results:
        output.append(f"Title: {ref['title']}\nBody: {ref['body']}\nURL: {ref['url']}")
    return "-\n".join(output)

if __name__ == "__main__":
    mcp.run(transport="stdio")