mtyrrell commited on
Commit
a38e3e8
·
1 Parent(s): 08a352f

fix for mcp output format

Browse files
Files changed (2) hide show
  1. app/main.py +110 -47
  2. app/retriever.py +0 -1
app/main.py CHANGED
@@ -11,9 +11,49 @@ except Exception as e:
11
  raise
12
 
13
  # ---------------------------------------------------------------------
14
- # Gradio Interface with MCP support
15
  # ---------------------------------------------------------------------
16
- def retriever_interface(query, reports_filter="", sources_filter="", subtype_filter="", year_filter=""):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
  Wrapper function for gradio interface to handle optional filter parameters
19
  """
@@ -35,59 +75,82 @@ def retriever_interface(query, reports_filter="", sources_filter="", subtype_fil
35
  # Format results for display
36
  formatted_results = []
37
  for i, doc in enumerate(results, 1):
38
- metadata_str = ", ".join([f"{k}: {v}" for k, v in doc.get("metadata", {}).items()])
39
- formatted_results.append(f"=== Result {i} ===\nContent: {doc['page_content']}\nMetadata: {metadata_str}\n")
 
 
 
 
 
 
 
 
 
40
 
41
  return "\n".join(formatted_results)
42
 
43
- ui = gr.Interface(
44
- fn=retriever_interface,
45
- inputs=[
46
- gr.Textbox(
47
- label="Query",
48
- lines=2,
49
- placeholder="Enter your search query here",
50
- info="The query to search for in the vector database"
51
- ),
52
- gr.Textbox(
53
- label="Reports Filter (optional)",
54
- lines=1,
55
- placeholder="report1.pdf, report2.pdf",
56
- info="Comma-separated list of specific report filenames to search within (leave empty for all)"
57
- ),
58
- gr.Textbox(
59
- label="Sources Filter (optional)",
60
- lines=1,
61
- placeholder="annual_report",
62
- info="Filter by document source type (leave empty for all)"
63
- ),
64
- gr.Textbox(
65
- label="Subtype Filter (optional)",
66
- lines=1,
67
- placeholder="financial",
68
- info="Filter by document subtype (leave empty for all)"
69
- ),
70
- gr.Textbox(
71
- label="Year Filter (optional)",
72
- lines=1,
73
- placeholder="2023, 2024",
74
- info="Comma-separated list of years to filter by (leave empty for all)"
75
- ),
76
- ],
77
- outputs=gr.Textbox(
78
- label="Retrieved Context",
79
- lines=10,
80
- show_copy_button=True
81
- ),
82
- title="RAG Retrieval Service UI",
83
- description="Retrieves semantically similar documents from vector database. Intended for use in RAG pipelines as an MCP server.",
84
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  # Launch with MCP server enabled
87
  if __name__ == "__main__":
88
  ui.launch(
89
  server_name="0.0.0.0",
90
- server_port=7860, # Different port from reranker
91
  mcp_server=True,
92
  show_error=True
93
  )
 
11
  raise
12
 
13
  # ---------------------------------------------------------------------
14
+ # MCP - returns raw dictionary format
15
  # ---------------------------------------------------------------------
16
+ def retrieve_mcp(
17
+ query: str,
18
+ reports_filter: str = "",
19
+ sources_filter: str = "",
20
+ subtype_filter: str = "",
21
+ year_filter: str = ""
22
+ ) -> list:
23
+ """
24
+ Retrieve semantically similar documents from the vector database for MCP clients.
25
+
26
+ Args:
27
+ query (str): The search query text
28
+ reports_filter (str): Comma-separated list of specific report filenames (optional)
29
+ sources_filter (str): Filter by document source type (optional)
30
+ subtype_filter (str): Filter by document subtype (optional)
31
+ year_filter (str): Comma-separated list of years to filter by (optional)
32
+
33
+ Returns:
34
+ list: List of dictionaries containing document content, metadata, and scores
35
+ """
36
+ # Parse filter inputs (convert empty strings to None or lists)
37
+ reports = [r.strip() for r in reports_filter.split(",") if r.strip()] if reports_filter else []
38
+ sources = sources_filter.strip() if sources_filter else None
39
+ subtype = subtype_filter.strip() if subtype_filter else None
40
+ year = [y.strip() for y in year_filter.split(",") if y.strip()] if year_filter else None
41
+
42
+ # Call retriever function and return raw results
43
+ results = retrieve_context(
44
+ query=query,
45
+ reports=reports,
46
+ sources=sources,
47
+ subtype=subtype,
48
+ year=year
49
+ )
50
+
51
+ return results
52
+
53
+ # ---------------------------------------------------------------------
54
+ # UI - returns formatted string
55
+ # ---------------------------------------------------------------------
56
+ def retrieve_ui(query, reports_filter="", sources_filter="", subtype_filter="", year_filter=""):
57
  """
58
  Wrapper function for gradio interface to handle optional filter parameters
59
  """
 
75
  # Format results for display
76
  formatted_results = []
77
  for i, doc in enumerate(results, 1):
78
+ # Extract content and metadata using the correct keys from HF Spaces API
79
+ content = doc.get('answer', '')
80
+ metadata = doc.get('answer_metadata', {})
81
+ score = doc.get('score', 'N/A')
82
+
83
+ metadata_str = ", ".join([f"{k}: {v}" for k, v in metadata.items()])
84
+ formatted_results.append(
85
+ f"=== Result {i} (Score: {score}) ===\n"
86
+ f"Content: {content}\n"
87
+ f"Metadata: {metadata_str}\n"
88
+ )
89
 
90
  return "\n".join(formatted_results)
91
 
92
+ # Create the Gradio interface with Blocks to support both UI and MCP
93
+ with gr.Blocks() as ui:
94
+ gr.Markdown("# RAG Retrieval Service UI")
95
+ gr.Markdown("Retrieves semantically similar documents from vector database. Intended for use in RAG pipelines as an MCP server.")
96
+
97
+ with gr.Row():
98
+ with gr.Column():
99
+ query_input = gr.Textbox(
100
+ label="Query",
101
+ lines=2,
102
+ placeholder="Enter your search query here",
103
+ info="The query to search for in the vector database"
104
+ )
105
+ reports_input = gr.Textbox(
106
+ label="Reports Filter (optional)",
107
+ lines=1,
108
+ placeholder="report1.pdf, report2.pdf",
109
+ info="Comma-separated list of specific report filenames to search within (leave empty for all)"
110
+ )
111
+ sources_input = gr.Textbox(
112
+ label="Sources Filter (optional)",
113
+ lines=1,
114
+ placeholder="annual_report",
115
+ info="Filter by document source type (leave empty for all)"
116
+ )
117
+ subtype_input = gr.Textbox(
118
+ label="Subtype Filter (optional)",
119
+ lines=1,
120
+ placeholder="financial",
121
+ info="Filter by document subtype (leave empty for all)"
122
+ )
123
+ year_input = gr.Textbox(
124
+ label="Year Filter (optional)",
125
+ lines=1,
126
+ placeholder="2023, 2024",
127
+ info="Comma-separated list of years to filter by (leave empty for all)"
128
+ )
129
+
130
+ submit_btn = gr.Button("Submit", variant="primary")
131
+
132
+ with gr.Column():
133
+ output = gr.Textbox(
134
+ label="Retrieved Context",
135
+ lines=10,
136
+ show_copy_button=True
137
+ )
138
+
139
+ # UI event handler
140
+ submit_btn.click(
141
+ fn=retrieve_ui,
142
+ inputs=[query_input, reports_input, sources_input, subtype_input, year_input],
143
+ outputs=output
144
+ )
145
+
146
+ # MCP endpoint
147
+ gr.api(retrieve_mcp)
148
 
149
  # Launch with MCP server enabled
150
  if __name__ == "__main__":
151
  ui.launch(
152
  server_name="0.0.0.0",
153
+ server_port=7860,
154
  mcp_server=True,
155
  show_error=True
156
  )
app/retriever.py CHANGED
@@ -91,7 +91,6 @@ def create_filter(
91
 
92
  def retrieve_context(
93
  query: str,
94
- vectorstore,
95
  reports: List[str] = None,
96
  sources: str = None,
97
  subtype: str = None,
 
91
 
92
  def retrieve_context(
93
  query: str,
 
94
  reports: List[str] = None,
95
  sources: str = None,
96
  subtype: str = None,