Spaces:

burtenshaw
/

autotrain-mcp

Running on A10G

App Files Files Community

burtenshaw commited on 10 days ago

Commit

8f5cc68

1 Parent(s): d6ee53d

add push functionality and note about duplication

Browse files

Files changed (1) hide show

app.py +132 -14

app.py CHANGED Viewed

@@ -68,9 +68,23 @@ def create_autotrain_params(
     epochs: int,
     batch_size: int,
     learning_rate: float,
     **kwargs,
 ):
     """Create AutoTrain parameter object based on task type"""
     common_params = {
         "model": base_model,
         "project_name": project_name,
@@ -94,6 +108,7 @@ def create_autotrain_params(
         "mixed_precision": "no",
         "save_total_limit": 1,
         "eval_strategy": "epoch",
     }
     if task == "text-classification":
@@ -114,12 +129,15 @@ def create_autotrain_params(
             "llm-reward": "reward",
         }
         return LLMTrainingParams(
-            **{
-                k: v
-                for k, v in common_params.items()
-                if k not in ["early_stopping_patience", "early_stopping_threshold"]
-            },
             text_column=kwargs.get("text_column", "messages"),
             block_size=kwargs.get("block_size", 2048),
             peft=kwargs.get("use_peft", True),
@@ -245,6 +263,8 @@ def start_training_job(
     batch_size: str = "8",
     learning_rate: str = "2e-5",
     backend: str = "local",
 ) -> str:
     """
     Start a new AutoTrain training job.
@@ -260,6 +280,8 @@ def start_training_job(
         batch_size: Training batch size (default: 16)
         learning_rate: Learning rate for training (default: 2e-5)
         backend: Training backend to use (default: local)
     Returns:
         Status message with run ID and details
@@ -269,6 +291,7 @@ def start_training_job(
         epochs_int = int(epochs)
         batch_size_int = int(batch_size)
         learning_rate_float = float(learning_rate)
         # Generate run ID
         run_id = str(uuid.uuid4())
@@ -283,12 +306,16 @@ def start_training_job(
             "status": "pending",
             "created_at": datetime.utcnow().isoformat(),
             "updated_at": datetime.utcnow().isoformat(),
             "config": {
                 "task": task,
                 "epochs": epochs_int,
                 "batch_size": batch_size_int,
                 "learning_rate": learning_rate_float,
                 "backend": backend,
             },
         }
@@ -306,6 +333,8 @@ def start_training_job(
             epochs=epochs_int,
             batch_size=batch_size_int,
             learning_rate=learning_rate_float,
         )
         # Start training in background
@@ -315,7 +344,8 @@ def start_training_job(
         thread.daemon = True
         thread.start()
-        return f"""✅ Training job submitted successfully!
 Run ID: {run_id}
 Project: {project_name}
@@ -327,7 +357,18 @@ Configuration:
 • Epochs: {epochs}
 • Batch Size: {batch_size}
 • Learning Rate: {learning_rate}
-• Backend: {backend}
 🔗 Monitor progress:
 • Gradio UI: http://localhost:7860
@@ -335,6 +376,8 @@ Configuration:
 💡 Use get_training_runs() to check status"""
     except Exception as e:
         return f"❌ Error submitting job: {str(e)}"
@@ -449,6 +492,18 @@ def get_run_details(run_id: str) -> str:
             details_text += f"\n• Learning Rate: {config.get('learning_rate')}"
             details_text += f"\n• Backend: {config.get('backend')}"
         return details_text
     except Exception as e:
@@ -656,6 +711,8 @@ def submit_training_job_ui(
     batch_size,
     learning_rate,
     backend,
 ):
     """Submit training job from web UI"""
     if not all([task, project_name, base_model, dataset_path]):
@@ -670,6 +727,8 @@ def submit_training_job_ui(
         batch_size=str(batch_size),
         learning_rate=str(learning_rate),
         backend=backend,
     )
     return result, fetch_runs_for_ui()
@@ -685,14 +744,42 @@ with gr.Blocks(
     }
     """,
 ) as app:
-    gr.Markdown("""
     # 🚀 AutoTrain Gradio MCP Server
-    **All-in-One Solution:** Web UI + MCP Server + AutoTrain Integration
-    • **Web Interface**: Manage training jobs through this UI
-    • **MCP Server**: AI assistants can use tools at `http://localhost:7860/gradio_api/mcp/sse`
-    • **Direct Integration**: No FastAPI needed - everything runs in Gradio
     """)
     with gr.Tabs():
@@ -716,6 +803,11 @@ with gr.Blocks(
         with gr.Tab("🏃 Start Training"):
             gr.Markdown("## Submit New Training Job")
             with gr.Row():
                 with gr.Column():
                     task_dropdown = gr.Dropdown(
@@ -750,6 +842,13 @@ with gr.Blocks(
                         value="local",
                     )
             submit_btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
             submit_output = gr.Textbox(label="Status", interactive=False, lines=10)
@@ -765,13 +864,27 @@ with gr.Blocks(
             ### Available MCP Tools:
-            - `start_training_job` - Submit new training jobs
             - `get_training_runs` - List all runs with status
             - `get_run_details` - Get detailed run information
-            - `delete_training_run` - Delete training runs
             - `get_task_recommendations` - Get training recommendations
             - `get_system_status` - Check system status
             ### Claude Desktop Configuration:
             ```json
@@ -788,6 +901,7 @@ with gr.Blocks(
             Total Runs: {len(load_runs())}
             W&B Project: {WANDB_PROJECT}
             """)
         # MCP Tools Tab
@@ -825,6 +939,8 @@ with gr.Blocks(
                     gr.Textbox(label="batch_size", value="8"),
                     gr.Textbox(label="learning_rate", value="2e-5"),
                     gr.Textbox(label="backend", value="local"),
                 ],
                 outputs=gr.Textbox(label="Training Job Result"),
                 title="start_training_job",
@@ -875,6 +991,8 @@ with gr.Blocks(
             batch_size,
             learning_rate,
             backend,
         ],
         outputs=[submit_output, runs_table],
     )

     epochs: int,
     batch_size: int,
     learning_rate: float,
+    push_to_hub: bool,
+    hub_repo_id: str = "",
     **kwargs,
 ):
     """Create AutoTrain parameter object based on task type"""
+    # Hub configuration
+    hub_config = {}
+    if push_to_hub:
+        hub_config = {
+            "push_to_hub": True,
+            "username": os.environ.get("HF_USERNAME", ""),
+            "token": os.environ.get("HF_TOKEN", ""),
+        }
+        # If custom repo_id is provided, use it; otherwise use project_name
+        if hub_repo_id:
+            hub_config["repo_id"] = hub_repo_id
     common_params = {
         "model": base_model,
         "project_name": project_name,
         "mixed_precision": "no",
         "save_total_limit": 1,
         "eval_strategy": "epoch",
+        **hub_config,  # Add hub configuration
     }
     if task == "text-classification":
             "llm-reward": "reward",
         }
+        # For LLM tasks, exclude some parameters that don't apply
+        llm_params = {
+            k: v
+            for k, v in common_params.items()
+            if k not in ["early_stopping_patience", "early_stopping_threshold"]
+        }
         return LLMTrainingParams(
+            **llm_params,
             text_column=kwargs.get("text_column", "messages"),
             block_size=kwargs.get("block_size", 2048),
             peft=kwargs.get("use_peft", True),
     batch_size: str = "8",
     learning_rate: str = "2e-5",
     backend: str = "local",
+    push_to_hub: str = "false",
+    hub_repo_id: str = "",
 ) -> str:
     """
     Start a new AutoTrain training job.
         batch_size: Training batch size (default: 16)
         learning_rate: Learning rate for training (default: 2e-5)
         backend: Training backend to use (default: local)
+        push_to_hub: Whether to push final model to Hub (true/false)
+        hub_repo_id: Custom repository ID for Hub (optional)
     Returns:
         Status message with run ID and details
         epochs_int = int(epochs)
         batch_size_int = int(batch_size)
         learning_rate_float = float(learning_rate)
+        push_to_hub_bool = push_to_hub.lower() == "true"
         # Generate run ID
         run_id = str(uuid.uuid4())
             "status": "pending",
             "created_at": datetime.utcnow().isoformat(),
             "updated_at": datetime.utcnow().isoformat(),
+            "push_to_hub": push_to_hub_bool,
+            "hub_repo_id": hub_repo_id,
             "config": {
                 "task": task,
                 "epochs": epochs_int,
                 "batch_size": batch_size_int,
                 "learning_rate": learning_rate_float,
                 "backend": backend,
+                "push_to_hub": push_to_hub_bool,
+                "hub_repo_id": hub_repo_id,
             },
         }
             epochs=epochs_int,
             batch_size=batch_size_int,
             learning_rate=learning_rate_float,
+            push_to_hub=push_to_hub_bool,
+            hub_repo_id=hub_repo_id,
         )
         # Start training in background
         thread.daemon = True
         thread.start()
+        # Build result message
+        result_msg = f"""✅ Training job submitted successfully!
 Run ID: {run_id}
 Project: {project_name}
 • Epochs: {epochs}
 • Batch Size: {batch_size}
 • Learning Rate: {learning_rate}
+• Backend: {backend}"""
+        if push_to_hub_bool:
+            final_repo = hub_repo_id if hub_repo_id else project_name
+            result_msg += f"""
+• Push to Hub: ✅ Enabled
+• Repository: {final_repo}
+• Requires: HF_USERNAME and HF_TOKEN environment variables"""
+        else:
+            result_msg += "\n• Push to Hub: ❌ Disabled"
+        result_msg += """
 🔗 Monitor progress:
 • Gradio UI: http://localhost:7860
 💡 Use get_training_runs() to check status"""
+        return result_msg
     except Exception as e:
         return f"❌ Error submitting job: {str(e)}"
             details_text += f"\n• Learning Rate: {config.get('learning_rate')}"
             details_text += f"\n• Backend: {config.get('backend')}"
+            # Hub configuration
+            if config.get("push_to_hub"):
+                details_text += "\n• Push to Hub: ✅ Enabled"
+                if config.get("hub_repo_id"):
+                    details_text += f"\n• Hub Repository: {config.get('hub_repo_id')}"
+                else:
+                    details_text += (
+                        f"\n• Hub Repository: {run['project_name']} (default)"
+                    )
+            else:
+                details_text += "\n• Push to Hub: ❌ Disabled"
         return details_text
     except Exception as e:
     batch_size,
     learning_rate,
     backend,
+    push_to_hub,
+    hub_repo_id,
 ):
     """Submit training job from web UI"""
     if not all([task, project_name, base_model, dataset_path]):
         batch_size=str(batch_size),
         learning_rate=str(learning_rate),
         backend=backend,
+        push_to_hub=str(push_to_hub).lower(),
+        hub_repo_id=hub_repo_id,
     )
     return result, fetch_runs_for_ui()
     }
     """,
 ) as app:
+    gr.Markdown(f"""
     # 🚀 AutoTrain Gradio MCP Server
+    Get your AI models to train your AI models!
+    This space is an MCP server that you can use in Claude Desktop, Cursor, VSCode, etc to train your AI models.
+    :warning: To train models you with need to duplicate this space!
+    **MCP Server**: AI assistants can use tools at http://SPACE_URL/gradio_api/mcp/sse
+    Connect to it like this:
+    ```json
+    {"mcpServers": {"autotrain": {"url": "http://SPACE_URL/gradio_api/mcp/sse",
+                "headers": {"Authorization": "Bearer <YOUR-HUGGING-FACE-TOKEN>"
+                    }
+        }
+      }
+    }
+    ```
+    Or like this for Claude Desktop:
+    ```json
+    {"mcpServers": {"hf-mcp-server": {"command": "npx",
+        "args": [
+            "mcp-remote",
+            "http://SPACE_URL/gradio_api/mcp/sse",
+            "--header",
+            "Authorization: Bearer <YOUR-HUGGING-FACE-TOKEN>"
+        ]
+    }
+    }
+    }
+    ```
     """)
     with gr.Tabs():
         with gr.Tab("🏃 Start Training"):
             gr.Markdown("## Submit New Training Job")
+            gr.Markdown("""
+            💡 **Hub Integration**: Enable "Push to Hub" to automatically upload your trained model to Hugging Face Hub.
+            Requires `HF_USERNAME` and `HF_TOKEN` environment variables.
+            """)
             with gr.Row():
                 with gr.Column():
                     task_dropdown = gr.Dropdown(
                         value="local",
                     )
+            with gr.Row():
+                with gr.Column():
+                    push_to_hub = gr.Checkbox(label="Push to Hub", value=False)
+                    hub_repo_id = gr.Textbox(
+                        label="Hub Repository ID", placeholder="your-repo-id"
+                    )
             submit_btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
             submit_output = gr.Textbox(label="Status", interactive=False, lines=10)
             ### Available MCP Tools:
+            - `start_training_job` - Submit new training jobs (includes Hub push)
             - `get_training_runs` - List all runs with status
             - `get_run_details` - Get detailed run information
             - `get_task_recommendations` - Get training recommendations
             - `get_system_status` - Check system status
+            ### 🤗 Hugging Face Hub Integration:
+            To push models to the Hub, set these environment variables:
+            ```bash
+            export HF_USERNAME="your-hf-username"
+            export HF_TOKEN="your-hf-write-token"
+            ```
+            Get your token from: https://huggingface.co/settings/tokens
+            **Usage Examples:**
+            - `push_to_hub="true"` - Push to Hub using project name as repo
+            - `hub_repo_id="my-org/my-model"` - Push to custom repository
             ### Claude Desktop Configuration:
             ```json
             Total Runs: {len(load_runs())}
             W&B Project: {WANDB_PROJECT}
+            Hub Auth: {"✅ Configured" if os.environ.get("HF_TOKEN") else "❌ Missing HF_TOKEN"}
             """)
         # MCP Tools Tab
                     gr.Textbox(label="batch_size", value="8"),
                     gr.Textbox(label="learning_rate", value="2e-5"),
                     gr.Textbox(label="backend", value="local"),
+                    gr.Textbox(label="push_to_hub", value="false"),
+                    gr.Textbox(label="hub_repo_id", placeholder="your-repo-id"),
                 ],
                 outputs=gr.Textbox(label="Training Job Result"),
                 title="start_training_job",
             batch_size,
             learning_rate,
             backend,
+            push_to_hub,
+            hub_repo_id,
         ],
         outputs=[submit_output, runs_table],
     )