Omar ID EL MOUMEN commited on
Commit
6c4ff0e
·
1 Parent(s): 8227e25

Add new version of MCP server

Browse files
Files changed (1) hide show
  1. server.py +119 -102
server.py CHANGED
@@ -1,24 +1,23 @@
1
- from typing import Any, Literal
2
  import httpx
3
  import traceback
4
  from mcp.server.fastmcp import FastMCP
5
 
6
  # Initialize FastMCP server
7
- mcp = FastMCP("arxiv-omar")
8
 
9
- # Constants
10
- CUSTOM_ARXIV_API_BASE = "https://om4r932-arxiv.hf.space"
11
- DDG_API_BASE = "https://ychkhan-ptt-endpoints.hf.space"
12
- API_3GPP_BASE = "https://organizedprogrammers-3gppdocfinder.hf.space"
13
 
14
- # Helpers
15
- async def make_request(url: str, data: dict = None) -> dict[str, Any] | None:
16
- if data is None:
17
- return None
18
- headers = {
19
- "Accept": "application/json"
20
- }
21
- async with httpx.AsyncClient(verify=False) as client:
22
  try:
23
  response = await client.post(url, headers=headers, json=data)
24
  print(response)
@@ -26,118 +25,136 @@ async def make_request(url: str, data: dict = None) -> dict[str, Any] | None:
26
  return response.json()
27
  except Exception as e:
28
  traceback.print_exception(e)
29
- return None
30
 
31
- def format_search(pub_id: str, content: dict) -> str:
32
- return f"""
33
- arXiv publication ID : {pub_id}
34
- Title : {content["title"]}
35
- Authors : {content["authors"]}
36
- Release Date : {content["date"]}
37
- Abstract : {content["abstract"]}
38
- PDF link : {content["pdf"]}
39
- """
 
 
 
 
40
 
41
- def format_extract(message: dict) -> str:
42
- return f"""
43
- Title of PDF : {message.get("title", "No title has been found")}
44
- Text : {message.get("text", "No text !")}
45
- """
 
 
 
 
 
 
46
 
47
- def format_result_search(page: dict) -> str:
48
- return f"""
49
- Title : {page.get("title", "No titles found !")}
50
- Little description : {page.get("body", "No description")}
51
- PDF url : {page.get("url", None)}
52
- """
53
 
54
- def format_3gpp_doc_result(result: dict, release: int = None) -> str:
55
- return f"""
56
- Document ID : {result.get("doc_id")}
57
- Release version : {release if release is not None else "Not specified"}
58
- URL : {result.get("url", "No URL found !")}
59
- """
60
 
61
- # Tools
62
  @mcp.tool()
63
- async def get_publications(keyword: str, limit: int = 15) -> str:
64
  """
65
- Get arXiv publications based on keywords and limit of documents
66
-
67
- Args:
68
- keyword: Keywords separated by spaces
69
- limit: Numbers of maximum publications returned (by default, 15)
70
  """
71
- url = f"{CUSTOM_ARXIV_API_BASE}/search"
72
- data = await make_request(url, data={'keyword': keyword, 'limit': limit})
 
 
73
  if data["error"]:
74
  return data["message"]
75
- if not data:
76
- return "Unable to fetch publications"
77
- if len(data["message"].keys()) == 0:
78
- return "No publications found"
79
 
80
- publications = [format_search(pub_id, content) for (pub_id, content) in data["message"].items()]
81
- return "\n--\n".join(publications)
 
 
 
 
 
 
82
 
83
  @mcp.tool()
84
- async def web_pdf_search(query: str) -> str:
85
  """
86
- Search on the Web (with DuckDuckGo search engine) to get PDF documents based on the keywords
87
-
88
- Args:
89
- query: Keywords to search documents on the Web
90
  """
 
 
 
 
 
 
 
91
 
92
- url = f"{DDG_API_BASE}/search"
93
- data = await make_request(url, data={"query": query})
94
- if not data:
95
- return "Unable to fetch results"
96
- if len(data["results"]) == 0:
97
- return "No results found"
98
-
99
- results = [format_result_search(result) for result in data["results"]]
100
- return "\n--\n".join(results)
101
-
102
  @mcp.tool()
103
- async def get_3gpp_doc_url_byID(doc_id: str, release: int = None):
104
  """
105
- Get 3GPP Technical Document URL by their document ID.
106
-
107
- Args:
108
- doc_id: Document ID (i.e. C4-125411, SP-551242, 31.101)
109
- release : The release version of the document (by default, None)
110
  """
111
- url = f"{API_3GPP_BASE}/find"
112
- data = await make_request(url, data={"doc_id": doc_id, "release": release})
113
- if not data:
114
- return "Unable to search document in 3GPP"
115
-
116
- return format_3gpp_doc_result(data, release)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  @mcp.tool()
119
- async def get_pdf_text(pdf_url: str, limit_page: int = -1) -> str:
120
  """
121
- Extract the text from the URL pointing to a PDF file
122
-
123
- Args:
124
- pdf_url: URL to a PDF document
125
- limit_page: How many pages the user wants to extract the content (default: -1 for all pages)
126
  """
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- url = f"{CUSTOM_ARXIV_API_BASE}/extract_pdf/url"
129
- data = {"url": pdf_url}
130
- if limit_page != -1:
131
- data["page_num"] = limit_page
132
- data = await make_request(url, data=data)
133
- if data["error"]:
134
- return data["message"]
135
- if not data:
136
- return "Unable to extract PDF text"
137
- if len(data["message"].keys()) == 0:
138
- return "No text can be extracted from this PDF"
139
-
140
- return format_extract(data["message"])
 
 
141
 
142
  if __name__ == "__main__":
143
  mcp.run(transport="stdio")
 
1
+ from typing import Any, List, Literal, Dict, Optional
2
  import httpx
3
  import traceback
4
  from mcp.server.fastmcp import FastMCP
5
 
6
  # Initialize FastMCP server
7
+ mcp = FastMCP("patent-problematic-generator-helper")
8
 
9
+ # API used
10
+ ARXIV_BASE = "https://om4r932-arxiv.hf.space"
11
+ DUCKDUCKGO_BASE = "https://ychkhan-ptt-endpoints.hf.space"
12
+ DOC3GPPFINDER_BASE = "https://organizedprogrammers-3gppdocfinder.hf.space"
13
 
14
+ # Request function
15
+
16
+ async def post_data_to_api(url, data = None):
17
+ if data is None or data == {}:
18
+ return (None, "")
19
+ headers = {"Accept": "application/json"}
20
+ async with httpx.AsyncClient(verify=False, timeout=180) as client:
 
21
  try:
22
  response = await client.post(url, headers=headers, json=data)
23
  print(response)
 
25
  return response.json()
26
  except Exception as e:
27
  traceback.print_exception(e)
28
+ return (None, e)
29
 
30
+ async def fake_post_data_to_api(url, params = None):
31
+ if params is None or params == {}:
32
+ return (None, "")
33
+ headers = {"Accept": "application/json"}
34
+ async with httpx.AsyncClient(verify=False, timeout=180) as client:
35
+ try:
36
+ response = await client.post(url, headers=headers, params=params)
37
+ print(response)
38
+ response.raise_for_status()
39
+ return response.json()
40
+ except Exception as e:
41
+ traceback.print_exception(e)
42
+ return (None, e)
43
 
44
+ async def get_data_from_api(url):
45
+ headers = {"Accept": "application/json"}
46
+ async with httpx.AsyncClient(verify=False, timeout=180) as client:
47
+ try:
48
+ response = await client.get(url, headers=headers)
49
+ print(response)
50
+ response.raise_for_status()
51
+ return response.json()
52
+ except Exception as e:
53
+ traceback.print_exception(e)
54
+ return (None, e)
55
 
56
+ # Tools
 
 
 
 
 
57
 
58
+ # arXiv
 
 
 
 
 
59
 
 
60
  @mcp.tool()
61
+ async def get_arxiv_publications(keywords: str, limit: int):
62
  """
63
+ Search arXiv publications based on keywords and a limit of documents printed
64
+ Arguments available: keywords: string [mandatory], limit: integer [mandatory, default = 5]
 
 
 
65
  """
66
+ endpoint = ARXIV_BASE + "/search"
67
+ data = await post_data_to_api(endpoint, {"keyword": keywords, "limit": limit})
68
+ if isinstance(data, tuple) and data[0] is None:
69
+ return f"An error has occured while getting publications: {data[1]}"
70
  if data["error"]:
71
  return data["message"]
72
+ if len(data) < 1:
73
+ return "No publications has been found"
 
 
74
 
75
+ results = data["message"]
76
+ output = []
77
+ for pub, metadata in results.items():
78
+ output.append(f"arXiv pub ID: {pub}\nTitle: {metadata['title']}\nAuthors: {metadata['authors']}\nPublished on: {metadata['date']}\nAbstract: {metadata['abstract']}\nPDF URL: {metadata['pdf']}\n")
79
+
80
+ return "-\n".join(output)
81
+
82
+ # 3GPP Doc Finder
83
 
84
  @mcp.tool()
85
+ async def get_document_url(doc_id: str, release: int = None):
86
  """
87
+ Find 3GPP document (TSG docs, specifications or workshop files) only by their ID [note that it will only work with keywords] (and release if it's a specification only) and return their position via a URL (and a scope if it's a specification)
88
+ Arguments available: doc_id: string [mandatory], release: integer [optional for every case]
 
 
89
  """
90
+ endpoint = DOC3GPPFINDER_BASE + "/find"
91
+ data = await post_data_to_api(endpoint, {"doc_id": doc_id, "release": release})
92
+ if isinstance(data, tuple) and data[0] is None:
93
+ return f"An error while searching publications: {data[1]}"
94
+ output = f'The document {doc_id} is available via this URL : {data.get("url", "No URL found !")}. '
95
+ output += f'\nThe scope of the document: {data["scope"]}' if data.get("scope", None) is not None or data.get("scope", None) == "" else ""
96
+ return output
97
 
 
 
 
 
 
 
 
 
 
 
98
  @mcp.tool()
99
+ async def search_specs(keywords: str, limit: int, release: str = None, wg: str = None, spec_type: str = None, mode: str = "and"):
100
  """
101
+ Search 3GPP specifications only by their keywords [note that it will only work with keywords](and some filters [see kwargs field])
102
+ Arguments available: keywords: string [mandatory, separated by space], limit [mandatory, default = 5], release: string [optional, filter] (Rel-x where x is the release version, generally the first number of the full version), wg: string [optional, filter] = working group (S1, C4, SP, ...), spec_type: string [optional, filter] (either TS or TR), mode [mandatory, default = 'and'] = search mode (and = all keywords must be in the search, or = at least one keyword in the search)
 
 
 
103
  """
104
+ endpoint = DOC3GPPFINDER_BASE + "/search-spec"
105
+ data = await post_data_to_api(endpoint, {
106
+ "keywords": keywords,
107
+ "release": release,
108
+ "wg": wg,
109
+ "spec_type": spec_type,
110
+ "mode": mode
111
+ })
112
+ if isinstance(data, tuple) and data[0] is None:
113
+ return f"An error has occured while searching specifications"
114
+ results = data['results'][:min(len(data['results'])-1, limit)]
115
+ output = []
116
+ for spec in results:
117
+ x = f"Found specification number {spec['id']} version {spec['release']}"
118
+ if spec['scope'] != "":
119
+ x += f" where {spec['scope'].lower()}\n"
120
+ else:
121
+ x += "\n"
122
+ output.append(x)
123
+ return "-\n".join(output)
124
 
125
  @mcp.tool()
126
+ async def get_multiple_documents_url(doc_ids: List[str], release: int = None):
127
  """
128
+ [BATCH] Search multiple 3GPP documents (TSG docs, specifications or workshop files) [note that it will only work with document ID] (and release if it's a specification only) and return only their position via a URL
129
+ Arguments available: doc_ids: list of string [mandatory], release: integer [optional for every case]
 
 
 
130
  """
131
+ endpoint = DOC3GPPFINDER_BASE + "/batch"
132
+ data = await post_data_to_api(endpoint, doc_ids=doc_ids, release=release)
133
+ if isinstance(data, tuple) and data[0] is None:
134
+ return f"An error while searching publications: {data[1]}"
135
+ results = data["results"]
136
+ output = []
137
+ for doc_id, url in results.items():
138
+ output.append(f'The document {doc_id} is available via this URL: {url}\n')
139
+ return "-\n".join(output)
140
+
141
+ # PTT Endpoints
142
 
143
+ @mcp.tool()
144
+ async def search_documents_web(query: str, data_type: str = None, limit: int = 5):
145
+ """
146
+ Search on the Web (thanks to DuckDuckGo) documents based on the user's query
147
+ Arguments available: query: string [mandatory], data_type: string [optional, either 'pdf', 'patent' or None (classic web search)], limit: integer [optional, default = 5]
148
+ """
149
+ endpoint = DUCKDUCKGO_BASE + "/search"
150
+ data = await fake_post_data_to_api(endpoint, query=query, data_type=data_type, max_references=limit)
151
+ if isinstance(data, tuple) and data[0] is None:
152
+ return f"An error while searching publications: {data[1]}"
153
+ results = data["results"]
154
+ output = []
155
+ for ref in results:
156
+ output.append(f"Title: {ref['title']}\nBody: {ref['body']}\nURL: {ref['url']}")
157
+ return "-\n".join(output)
158
 
159
  if __name__ == "__main__":
160
  mcp.run(transport="stdio")