Spaces:

Agents-MCP-Hackathon
/

data_science_agent

Paused

App Files Files Community

bpHigh commited on Jun 9

Commit

a604df6

1 Parent(s): d92bd13

Add proper planning with mcp support

Browse files

Files changed (4) hide show

app.py +29 -6
prompts/planning.py +14 -3
requirements.txt +2 -0
utils/huggingface_mcp_llamaindex.py +17 -0

app.py CHANGED Viewed

@@ -6,7 +6,8 @@ import os
 from utils.google_genai_llm import get_response, generate_with_gemini
 from utils.utils import parse_json_codefences
 from prompts.requirements_gathering import requirements_gathering_system_prompt
-from prompts.planning import hf_query_gen_prompt
 from prompts.devstral_coding_prompt import devstral_code_gen_sys_prompt, devstral_code_gen_user_prompt
 from dotenv import load_dotenv
 import os
@@ -231,7 +232,7 @@ def upload_file_handler(files):
         return files
     return []
-def generate_plan(history, file_cache):
     """Generate a plan using the planning prompt and Gemini API"""
     # Build conversation history
@@ -241,17 +242,39 @@ def generate_plan(history, file_cache):
             conversation_history += f"User: {user_msg}\n"
             if ai_msg:
                 conversation_history += f"Assistant: {ai_msg}\n"
     # Format the prompt
-    formatted_prompt = hf_query_gen_prompt + "\n\n" + conversation_history
     # Get plan from Gemini
     plan = generate_with_gemini(formatted_prompt, "Planning with gemini")
     # Parse the plan
     parsed_plan = parse_json_codefences(plan)
-    return parsed_plan
 def generate_code_with_devstral(plan_text, history, file_cache):
     """Generate code using the deployed Devstral model via Modal"""

 from utils.google_genai_llm import get_response, generate_with_gemini
 from utils.utils import parse_json_codefences
 from prompts.requirements_gathering import requirements_gathering_system_prompt
+from prompts.planning import hf_query_gen_prompt, hf_context_gen_prompt
+from utils.huggingface_mcp_llamaindex import connect_and_get_tools, call_tool
 from prompts.devstral_coding_prompt import devstral_code_gen_sys_prompt, devstral_code_gen_user_prompt
 from dotenv import load_dotenv
 import os
         return files
     return []
+async def generate_plan(history, file_cache):
     """Generate a plan using the planning prompt and Gemini API"""
     # Build conversation history
             conversation_history += f"User: {user_msg}\n"
             if ai_msg:
                 conversation_history += f"Assistant: {ai_msg}\n"
+    hf_query_gen_tool_details = await connect_and_get_tools()
     # Format the prompt
+    formatted_prompt = hf_query_gen_prompt.format(
+        Tool_Details=hf_query_gen_tool_details
+    ) + "\n\n" + conversation_history
     # Get plan from Gemini
     plan = generate_with_gemini(formatted_prompt, "Planning with gemini")
     # Parse the plan
     parsed_plan = parse_json_codefences(plan)
+    # Call tool to get tool calls
+    try:
+        tool_calls = await asyncio.gather(*[call_tool(step['tool'], step['args']) for step in parsed_plan])
+    except Exception as e:
+        tool_calls = []
+    if tool_calls!=[]:
+        formatted_context_prompt = hf_context_gen_prompt.format(
+            Conversation=conversation_history,
+            Tool_Calls=parsed_plan,
+            Results=tool_calls
+        )
+        context = generate_with_gemini(formatted_context_prompt, "Generating context for plan")
+    else:
+        formatted_context_prompt = hf_context_gen_prompt.format(
+            Conversation=conversation_history,
+            Tool_Calls=parsed_plan,
+            Results="Couldn't generate the tool calls results but use your knowledge about huggingface platform(models, datasets, spaces, training libraries, transfomers library etc.) as backup to generate the plan"
+        )
+        context = generate_with_gemini(formatted_context_prompt, "Generating context for plan")
+    return context
 def generate_code_with_devstral(plan_text, history, file_cache):
     """Generate code using the deployed Devstral model via Modal"""

prompts/planning.py CHANGED Viewed

@@ -16,13 +16,24 @@ Get detailed information about a specific model
 Search for datasets with filters for author, tags, etc…
 * Dataset Details
 Get detailed information about a specific dataset
 When the user describes a problem, respond with:
 - A JSON list of tool-calls such as:
   ```json
   [
-    {"tool": "search-datasets", "args": {"query": "...", "limit": 5}},
-    {"tool": "search-models", "args": {"query": "...", "limit": 3}}
   ]
   ```
 Just provide the response in the provided json format without any suffix or prefix or any explanation.
-"""

 Search for datasets with filters for author, tags, etc…
 * Dataset Details
 Get detailed information about a specific dataset
+Complete_Tool_Details: {Tool_Details}
 When the user describes a problem, respond with:
 - A JSON list of tool-calls such as:
   ```json
   [
+    {{"tool": "dataset_search", "args": {{"query": "...", "limit": 5}}}},
+    {{"tool": "model_search", "args": {{"query": "...", "limit": 3}}}}
   ]
   ```
 Just provide the response in the provided json format without any suffix or prefix or any explanation.
+"""
+hf_context_gen_prompt="""Given the chat history between a user and chatbot about his data science problem and the requirements and the generated tool calls and planning to solve the problem using huggingface api and the results of the queries, generate a plan with context with all the details of the results of the queries and the problem statements which can be passed to an llm to generate a solution code for the problem using huggingface platform libraries and models, spaces, datasets etc.
+Conversation: {Conversation}
+Tool_Calls: {Tool_Calls}
+Results: {Results}
+Plan: """

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 google-genai==1.19.0
 gradio[mcp]==5.33.0
 pandas==2.3.0
 python-dotenv==1.0.1
@@ -6,3 +7,4 @@ openpyxl==3.1.5
 Pillow==10.4.0
 marker-pdf==1.7.4
 modal==0.75.6

 google-genai==1.19.0
+mcp==1.9.1
 gradio[mcp]==5.33.0
 pandas==2.3.0
 python-dotenv==1.0.1
 Pillow==10.4.0
 marker-pdf==1.7.4
 modal==0.75.6
+llama-index-tools-mcp

utils/huggingface_mcp_llamaindex.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from llama_index.tools.mcp import BasicMCPClient
+from dotenv import load_dotenv
+import os
+load_dotenv()
+async def connect_and_get_tools():
+    # Connect to an MCP server using different transports
+    http_client = BasicMCPClient("https://huggingface.co/mcp", headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"})
+    # List available tools
+    tools = await http_client.list_tools()
+    return tools
+async def call_tool(tool_name, tool_args):
+    http_client = BasicMCPClient("https://huggingface.co/mcp", headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"})
+    result = await http_client.call_tool(tool_name, tool_args)
+    return result