Spaces:
Running
Running
fix launch script deploy and refactors
Browse files- config/train_smollm3_h100_lightweight.py +2 -0
- docs/GIT_CONFIGURATION_GUIDE.md +258 -0
- launch.sh +62 -45
- scripts/dataset_tonic/setup_hf_dataset.py +18 -2
- scripts/trackio_tonic/deploy_trackio_space.py +69 -56
- scripts/training/train.py +6 -0
- src/data.py +31 -1
- src/train.py +4 -2
- templates/datasets/readme.md +95 -0
- templates/spaces/README.md +46 -0
- templates/spaces/{requirements_space.txt β requirements.txt} +0 -0
- test_pipeline.py +0 -260
- tests/test_deployment.py +167 -0
- test_formatting_fix.py β tests/test_formatting_fix.py +0 -0
- tests/test_pipeline.py +150 -0
- tests/test_readme_template.py +123 -0
- tests/test_simple_pipeline.py +130 -0
config/train_smollm3_h100_lightweight.py
CHANGED
@@ -56,6 +56,8 @@ config = SmolLM3Config(
|
|
56 |
target_field="completion",
|
57 |
filter_bad_entries=False,
|
58 |
bad_entry_field="bad_entry",
|
|
|
|
|
59 |
|
60 |
# Chat template configuration
|
61 |
use_chat_template=True,
|
|
|
56 |
target_field="completion",
|
57 |
filter_bad_entries=False,
|
58 |
bad_entry_field="bad_entry",
|
59 |
+
sample_size=80000, # 80K samples for lightweight training
|
60 |
+
sample_seed=42, # For reproducibility
|
61 |
|
62 |
# Chat template configuration
|
63 |
use_chat_template=True,
|
docs/GIT_CONFIGURATION_GUIDE.md
ADDED
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Git Configuration Guide for Hugging Face Operations
|
2 |
+
|
3 |
+
This guide explains the correct way to configure git for Hugging Face Spaces deployment and model pushing operations.
|
4 |
+
|
5 |
+
## π― **Overview**
|
6 |
+
|
7 |
+
When working with Hugging Face Spaces and model repositories, proper git configuration is essential for:
|
8 |
+
- Creating and deploying Spaces
|
9 |
+
- Pushing models to the Hub
|
10 |
+
- Managing experiment tracking datasets
|
11 |
+
- Ensuring proper authentication
|
12 |
+
- **Using the user's actual email address for proper git identity and commit attribution**
|
13 |
+
|
14 |
+
## β
**Correct Git Configuration**
|
15 |
+
|
16 |
+
### **1. Local vs Global Configuration**
|
17 |
+
|
18 |
+
**β Wrong (Current):**
|
19 |
+
```bash
|
20 |
+
git config --global user.email "[email protected]"
|
21 |
+
git config --global user.name "$HF_USERNAME"
|
22 |
+
```
|
23 |
+
|
24 |
+
**β
Correct (Updated):**
|
25 |
+
```bash
|
26 |
+
# Get user's actual email address
|
27 |
+
read -p "Enter your email address for git configuration: " GIT_EMAIL
|
28 |
+
|
29 |
+
# Configure git locally for this project only
|
30 |
+
git config user.email "$GIT_EMAIL"
|
31 |
+
git config user.name "$HF_USERNAME"
|
32 |
+
|
33 |
+
# Verify configuration
|
34 |
+
git config user.email
|
35 |
+
git config user.name
|
36 |
+
```
|
37 |
+
|
38 |
+
### **2. Proper Authentication Setup**
|
39 |
+
|
40 |
+
**β
Correct Authentication:**
|
41 |
+
```bash
|
42 |
+
# Login with token and add to git credentials
|
43 |
+
huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential
|
44 |
+
|
45 |
+
# Verify login
|
46 |
+
huggingface-cli whoami
|
47 |
+
```
|
48 |
+
|
49 |
+
### **3. Error Handling**
|
50 |
+
|
51 |
+
**β
Robust Configuration:**
|
52 |
+
```bash
|
53 |
+
# Get user's email and configure git with error handling
|
54 |
+
read -p "Enter your email address for git configuration: " GIT_EMAIL
|
55 |
+
|
56 |
+
if git config user.email "$GIT_EMAIL" && \
|
57 |
+
git config user.name "$HF_USERNAME"; then
|
58 |
+
echo "β
Git configured successfully"
|
59 |
+
echo " Email: $(git config user.email)"
|
60 |
+
echo " Name: $(git config user.name)"
|
61 |
+
else
|
62 |
+
echo "β Failed to configure git"
|
63 |
+
exit 1
|
64 |
+
fi
|
65 |
+
```
|
66 |
+
|
67 |
+
## π§ **Why These Changes Matter**
|
68 |
+
|
69 |
+
### **1. Local Configuration Benefits**
|
70 |
+
- **Isolation**: Doesn't affect other projects on the system
|
71 |
+
- **Project-specific**: Each project can have different git settings
|
72 |
+
- **Cleaner**: No global state pollution
|
73 |
+
- **Safer**: Won't interfere with existing git configurations
|
74 |
+
|
75 |
+
### **2. User's Actual Email Address**
|
76 |
+
- **Professional**: Uses the user's real email address
|
77 |
+
- **Authentic**: Represents the actual user's identity
|
78 |
+
- **Consistent**: Matches the user's Hugging Face account
|
79 |
+
- **Best Practice**: Follows git configuration standards
|
80 |
+
|
81 |
+
### **3. Token-based Authentication**
|
82 |
+
- **Secure**: Uses HF token instead of username/password
|
83 |
+
- **Automated**: No manual password entry required
|
84 |
+
- **Persistent**: Credentials stored securely
|
85 |
+
- **Verified**: Includes verification steps
|
86 |
+
|
87 |
+
## π **Implementation in Launch Script**
|
88 |
+
|
89 |
+
### **Updated Authentication Step:**
|
90 |
+
```bash
|
91 |
+
# Step 8: Authentication setup
|
92 |
+
print_step "Step 8: Authentication Setup"
|
93 |
+
echo "================================"
|
94 |
+
|
95 |
+
export HF_TOKEN="$HF_TOKEN"
|
96 |
+
export TRACKIO_DATASET_REPO="$TRACKIO_DATASET_REPO"
|
97 |
+
|
98 |
+
# Login to Hugging Face with token
|
99 |
+
print_info "Logging in to Hugging Face..."
|
100 |
+
if huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential; then
|
101 |
+
print_status "Successfully logged in to Hugging Face"
|
102 |
+
print_info "Username: $(huggingface-cli whoami)"
|
103 |
+
else
|
104 |
+
print_error "Failed to login to Hugging Face"
|
105 |
+
print_error "Please check your token and try again"
|
106 |
+
exit 1
|
107 |
+
fi
|
108 |
+
|
109 |
+
# Configure git for HF operations
|
110 |
+
print_step "Step 8.1: Git Configuration"
|
111 |
+
echo "================================"
|
112 |
+
|
113 |
+
print_info "Configuring git for Hugging Face operations..."
|
114 |
+
|
115 |
+
# Get user's email for git configuration
|
116 |
+
get_input "Enter your email address for git configuration" "" GIT_EMAIL
|
117 |
+
|
118 |
+
# Configure git locally (not globally) for this project
|
119 |
+
git config user.email "$GIT_EMAIL"
|
120 |
+
git config user.name "$HF_USERNAME"
|
121 |
+
|
122 |
+
# Verify git configuration
|
123 |
+
print_info "Verifying git configuration..."
|
124 |
+
if git config user.email && git config user.name; then
|
125 |
+
print_status "Git configured successfully"
|
126 |
+
print_info " Email: $(git config user.email)"
|
127 |
+
print_info " Name: $(git config user.name)"
|
128 |
+
else
|
129 |
+
print_error "Failed to configure git"
|
130 |
+
exit 1
|
131 |
+
fi
|
132 |
+
```
|
133 |
+
|
134 |
+
## π **Deployment Script Improvements**
|
135 |
+
|
136 |
+
### **Robust File Upload:**
|
137 |
+
```python
|
138 |
+
def upload_files(self) -> bool:
|
139 |
+
"""Upload necessary files to the Space"""
|
140 |
+
try:
|
141 |
+
print("Uploading files to Space...")
|
142 |
+
|
143 |
+
# Files to upload
|
144 |
+
files_to_upload = [
|
145 |
+
"app.py",
|
146 |
+
"requirements_space.txt",
|
147 |
+
"README.md"
|
148 |
+
]
|
149 |
+
|
150 |
+
# Check if we're in a git repository
|
151 |
+
try:
|
152 |
+
subprocess.run(["git", "status"], capture_output=True, check=True)
|
153 |
+
except subprocess.CalledProcessError:
|
154 |
+
print("β οΈ Not in a git repository, initializing...")
|
155 |
+
subprocess.run(["git", "init"], check=True)
|
156 |
+
subprocess.run(["git", "remote", "add", "origin", f"https://huggingface.co/spaces/{self.username}/{self.space_name}"], check=True)
|
157 |
+
|
158 |
+
# Add all files at once
|
159 |
+
existing_files = [f for f in files_to_upload if os.path.exists(f)]
|
160 |
+
if existing_files:
|
161 |
+
subprocess.run(["git", "add"] + existing_files, check=True)
|
162 |
+
subprocess.run(["git", "commit", "-m", "Initial Space setup"], check=True)
|
163 |
+
|
164 |
+
# Push to the space
|
165 |
+
try:
|
166 |
+
subprocess.run(["git", "push", "origin", "main"], check=True)
|
167 |
+
print(f"β
Uploaded {len(existing_files)} files")
|
168 |
+
except subprocess.CalledProcessError:
|
169 |
+
# Try pushing to master branch if main doesn't exist
|
170 |
+
subprocess.run(["git", "push", "origin", "master"], check=True)
|
171 |
+
print(f"β
Uploaded {len(existing_files)} files")
|
172 |
+
else:
|
173 |
+
print("β οΈ No files found to upload")
|
174 |
+
|
175 |
+
return True
|
176 |
+
|
177 |
+
except Exception as e:
|
178 |
+
print(f"β Error uploading files: {e}")
|
179 |
+
return False
|
180 |
+
```
|
181 |
+
|
182 |
+
## π **Troubleshooting**
|
183 |
+
|
184 |
+
### **Common Issues and Solutions:**
|
185 |
+
|
186 |
+
#### **1. Git Configuration Fails**
|
187 |
+
```bash
|
188 |
+
# Check current git config
|
189 |
+
git config --list
|
190 |
+
|
191 |
+
# Reset if needed
|
192 |
+
git config --unset user.email
|
193 |
+
git config --unset user.name
|
194 |
+
|
195 |
+
# Reconfigure
|
196 |
+
git config user.email "[email protected]"
|
197 |
+
git config user.name "your-username"
|
198 |
+
```
|
199 |
+
|
200 |
+
#### **2. Authentication Issues**
|
201 |
+
```bash
|
202 |
+
# Check HF login status
|
203 |
+
huggingface-cli whoami
|
204 |
+
|
205 |
+
# Re-login if needed
|
206 |
+
huggingface-cli logout
|
207 |
+
huggingface-cli login --token "your-token"
|
208 |
+
```
|
209 |
+
|
210 |
+
#### **3. Space Deployment Fails**
|
211 |
+
```bash
|
212 |
+
# Check git remote
|
213 |
+
git remote -v
|
214 |
+
|
215 |
+
# Re-add remote if needed
|
216 |
+
git remote remove origin
|
217 |
+
git remote add origin https://huggingface.co/spaces/username/space-name
|
218 |
+
```
|
219 |
+
|
220 |
+
## π **Best Practices**
|
221 |
+
|
222 |
+
### **1. Always Use Local Configuration**
|
223 |
+
- Use `git config` without `--global` flag
|
224 |
+
- Keeps project configurations isolated
|
225 |
+
- Prevents conflicts with other projects
|
226 |
+
|
227 |
+
### **2. Verify Configuration**
|
228 |
+
- Always check that git config was successful
|
229 |
+
- Display configured values for verification
|
230 |
+
- Exit on failure to prevent downstream issues
|
231 |
+
|
232 |
+
### **3. Use Token-based Authentication**
|
233 |
+
- More secure than username/password
|
234 |
+
- Automatically handles credential storage
|
235 |
+
- Works well with CI/CD systems
|
236 |
+
|
237 |
+
### **4. Handle Errors Gracefully**
|
238 |
+
- Check return codes from git commands
|
239 |
+
- Provide clear error messages
|
240 |
+
- Exit early on critical failures
|
241 |
+
|
242 |
+
### **5. Test Configuration**
|
243 |
+
- Verify git config after setting it
|
244 |
+
- Test HF login before proceeding
|
245 |
+
- Validate remote repository access
|
246 |
+
|
247 |
+
## π― **Summary**
|
248 |
+
|
249 |
+
The updated git configuration approach provides:
|
250 |
+
|
251 |
+
1. **β
Better Isolation**: Local configuration doesn't affect system-wide settings
|
252 |
+
2. **β
User's Actual Email**: Uses the user's real email address for proper git identity
|
253 |
+
3. **β
Proper Authentication**: Token-based login with credential storage
|
254 |
+
4. **β
Error Handling**: Robust verification and error reporting
|
255 |
+
5. **β
Professional Setup**: Uses user's actual email and verification
|
256 |
+
6. **β
Deployment Reliability**: Improved Space deployment with git repository handling
|
257 |
+
|
258 |
+
This ensures a more reliable and professional setup for Hugging Face operations in the SmolLM3 fine-tuning pipeline.
|
launch.sh
CHANGED
@@ -448,7 +448,41 @@ echo "================================"
|
|
448 |
|
449 |
export HF_TOKEN="$HF_TOKEN"
|
450 |
export TRACKIO_DATASET_REPO="$TRACKIO_DATASET_REPO"
|
451 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
452 |
|
453 |
# Step 9: Deploy Trackio Space
|
454 |
print_step "Step 9: Deploying Trackio Space"
|
@@ -482,14 +516,14 @@ echo "================================="
|
|
482 |
cd ../trackio_tonic
|
483 |
python configure_trackio.py
|
484 |
|
485 |
-
# Step 12:
|
486 |
-
print_step "Step 12:
|
487 |
-
echo "
|
488 |
|
489 |
cd ../..
|
490 |
-
|
491 |
|
492 |
-
# Step 13: Dataset
|
493 |
print_step "Step 13: Dataset Configuration"
|
494 |
echo "=================================="
|
495 |
|
@@ -499,57 +533,40 @@ if [ "$TRAINING_CONFIG_TYPE" = "H100 Lightweight (Rapid)" ]; then
|
|
499 |
print_info "Sample size: ${DATASET_SAMPLE_SIZE:-80000} (will be handled by data.py)"
|
500 |
fi
|
501 |
|
502 |
-
# Step 14:
|
503 |
-
print_step "Step 14:
|
504 |
-
echo "
|
505 |
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
echo " Sequence length: $MAX_SEQ_LENGTH"
|
512 |
-
echo " Training steps will be calculated by the training script"
|
513 |
|
514 |
# Step 15: Start training
|
515 |
print_step "Step 15: Starting Training"
|
516 |
echo "=============================="
|
517 |
|
518 |
-
print_info "
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
--
|
529 |
-
--out_dir /output-checkpoint \
|
530 |
-
--init_from scratch \
|
531 |
-
--batch_size $BATCH_SIZE \
|
532 |
-
--learning_rate $LEARNING_RATE \
|
533 |
-
--gradient_accumulation_steps $GRADIENT_ACCUMULATION_STEPS \
|
534 |
-
--max_seq_length $MAX_SEQ_LENGTH \
|
535 |
-
--save_steps $SAVE_STEPS \
|
536 |
-
--eval_steps $EVAL_STEPS \
|
537 |
-
--logging_steps $LOGGING_STEPS \
|
538 |
-
--enable_tracking \
|
539 |
-
--trackio_url "$TRACKIO_URL" \
|
540 |
-
--experiment_name "$EXPERIMENT_NAME" \
|
541 |
-
--hf_token "$HF_TOKEN" \
|
542 |
-
--dataset_repo "$TRACKIO_DATASET_REPO"
|
543 |
|
544 |
# Step 16: Push model to Hugging Face Hub
|
545 |
print_step "Step 16: Pushing Model to HF Hub"
|
546 |
echo "====================================="
|
547 |
|
548 |
-
print_info "
|
549 |
-
|
550 |
-
echo " Repository: $REPO_NAME"
|
551 |
|
552 |
-
# Run the
|
553 |
python scripts/model_tonic/push_to_huggingface.py /output-checkpoint "$REPO_NAME" \
|
554 |
--token "$HF_TOKEN" \
|
555 |
--trackio-url "$TRACKIO_URL" \
|
|
|
448 |
|
449 |
export HF_TOKEN="$HF_TOKEN"
|
450 |
export TRACKIO_DATASET_REPO="$TRACKIO_DATASET_REPO"
|
451 |
+
|
452 |
+
# Login to Hugging Face with token
|
453 |
+
print_info "Logging in to Hugging Face..."
|
454 |
+
if huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential; then
|
455 |
+
print_status "Successfully logged in to Hugging Face"
|
456 |
+
print_info "Username: $(huggingface-cli whoami)"
|
457 |
+
else
|
458 |
+
print_error "Failed to login to Hugging Face"
|
459 |
+
print_error "Please check your token and try again"
|
460 |
+
exit 1
|
461 |
+
fi
|
462 |
+
|
463 |
+
# Configure git for HF operations
|
464 |
+
print_step "Step 8.1: Git Configuration"
|
465 |
+
echo "================================"
|
466 |
+
|
467 |
+
print_info "Configuring git for Hugging Face operations..."
|
468 |
+
|
469 |
+
# Get user's email for git configuration
|
470 |
+
get_input "Enter the email you used to register your account at huggingface for git configuration" "" GIT_EMAIL
|
471 |
+
|
472 |
+
# Configure git locally (not globally) for this project
|
473 |
+
git config user.email "$GIT_EMAIL"
|
474 |
+
git config user.name "$HF_USERNAME"
|
475 |
+
|
476 |
+
# Verify git configuration
|
477 |
+
print_info "Verifying git configuration..."
|
478 |
+
if git config user.email && git config user.name; then
|
479 |
+
print_status "Git configured successfully"
|
480 |
+
print_info " Email: $(git config user.email)"
|
481 |
+
print_info " Name: $(git config user.name)"
|
482 |
+
else
|
483 |
+
print_error "Failed to configure git"
|
484 |
+
exit 1
|
485 |
+
fi
|
486 |
|
487 |
# Step 9: Deploy Trackio Space
|
488 |
print_step "Step 9: Deploying Trackio Space"
|
|
|
516 |
cd ../trackio_tonic
|
517 |
python configure_trackio.py
|
518 |
|
519 |
+
# Step 12: Training Configuration
|
520 |
+
print_step "Step 12: Training Configuration"
|
521 |
+
echo "==================================="
|
522 |
|
523 |
cd ../..
|
524 |
+
print_info "Using existing configuration file: $CONFIG_FILE"
|
525 |
|
526 |
+
# Step 13: Dataset Configuration
|
527 |
print_step "Step 13: Dataset Configuration"
|
528 |
echo "=================================="
|
529 |
|
|
|
533 |
print_info "Sample size: ${DATASET_SAMPLE_SIZE:-80000} (will be handled by data.py)"
|
534 |
fi
|
535 |
|
536 |
+
# Step 14: Training Parameters
|
537 |
+
print_step "Step 14: Training Parameters"
|
538 |
+
echo "================================"
|
539 |
|
540 |
+
print_info "Training parameters will be loaded from configuration file"
|
541 |
+
print_info "Model: $MODEL_NAME"
|
542 |
+
print_info "Dataset: $DATASET_NAME"
|
543 |
+
print_info "Batch size: $BATCH_SIZE"
|
544 |
+
print_info "Learning rate: $LEARNING_RATE"
|
|
|
|
|
545 |
|
546 |
# Step 15: Start training
|
547 |
print_step "Step 15: Starting Training"
|
548 |
echo "=============================="
|
549 |
|
550 |
+
print_info "Starting training with configuration: $CONFIG_FILE"
|
551 |
+
print_info "Experiment: $EXPERIMENT_NAME"
|
552 |
+
print_info "Output: /output-checkpoint"
|
553 |
+
print_info "Trackio: $TRACKIO_URL"
|
554 |
+
|
555 |
+
# Run the simpler training script
|
556 |
+
python scripts/training/train.py \
|
557 |
+
--config "$CONFIG_FILE" \
|
558 |
+
--experiment-name "$EXPERIMENT_NAME" \
|
559 |
+
--output-dir /output-checkpoint \
|
560 |
+
--trackio-url "$TRACKIO_URL"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
561 |
|
562 |
# Step 16: Push model to Hugging Face Hub
|
563 |
print_step "Step 16: Pushing Model to HF Hub"
|
564 |
echo "====================================="
|
565 |
|
566 |
+
print_info "Pushing model to: $REPO_NAME"
|
567 |
+
print_info "Checkpoint: /output-checkpoint"
|
|
|
568 |
|
569 |
+
# Run the push script
|
570 |
python scripts/model_tonic/push_to_huggingface.py /output-checkpoint "$REPO_NAME" \
|
571 |
--token "$HF_TOKEN" \
|
572 |
--trackio-url "$TRACKIO_URL" \
|
scripts/dataset_tonic/setup_hf_dataset.py
CHANGED
@@ -6,6 +6,7 @@ Setup script for Hugging Face Dataset repository for Trackio experiments
|
|
6 |
import os
|
7 |
import json
|
8 |
from datetime import datetime
|
|
|
9 |
from datasets import Dataset
|
10 |
from huggingface_hub import HfApi
|
11 |
|
@@ -249,16 +250,31 @@ def setup_trackio_dataset():
|
|
249 |
# Create dataset
|
250 |
dataset = Dataset.from_list(initial_experiments)
|
251 |
|
252 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
api = HfApi(token=hf_token)
|
254 |
dataset.push_to_hub(
|
255 |
dataset_repo,
|
256 |
token=hf_token,
|
257 |
-
private=True # Make it private for security
|
|
|
258 |
)
|
259 |
|
260 |
print(f"β
Successfully created dataset: {dataset_repo}")
|
261 |
print(f"π Added {len(initial_experiments)} experiments")
|
|
|
|
|
262 |
print("π Dataset is private (only accessible with your token)")
|
263 |
print("\nπ― Next steps:")
|
264 |
print("1. Set HF_TOKEN in your Hugging Face Space environment")
|
|
|
6 |
import os
|
7 |
import json
|
8 |
from datetime import datetime
|
9 |
+
from pathlib import Path
|
10 |
from datasets import Dataset
|
11 |
from huggingface_hub import HfApi
|
12 |
|
|
|
250 |
# Create dataset
|
251 |
dataset = Dataset.from_list(initial_experiments)
|
252 |
|
253 |
+
# Get the project root directory (2 levels up from this script)
|
254 |
+
project_root = Path(__file__).parent.parent.parent
|
255 |
+
templates_dir = project_root / "templates" / "datasets"
|
256 |
+
readme_path = templates_dir / "readme.md"
|
257 |
+
|
258 |
+
# Read README content if it exists
|
259 |
+
readme_content = None
|
260 |
+
if readme_path.exists():
|
261 |
+
with open(readme_path, 'r', encoding='utf-8') as f:
|
262 |
+
readme_content = f.read()
|
263 |
+
print(f"β
Found README template: {readme_path}")
|
264 |
+
|
265 |
+
# Push to HF Hub with README
|
266 |
api = HfApi(token=hf_token)
|
267 |
dataset.push_to_hub(
|
268 |
dataset_repo,
|
269 |
token=hf_token,
|
270 |
+
private=True, # Make it private for security
|
271 |
+
readme_content=readme_content # Include README if available
|
272 |
)
|
273 |
|
274 |
print(f"β
Successfully created dataset: {dataset_repo}")
|
275 |
print(f"π Added {len(initial_experiments)} experiments")
|
276 |
+
if readme_content:
|
277 |
+
print("π Included README from templates")
|
278 |
print("π Dataset is private (only accessible with your token)")
|
279 |
print("\nπ― Next steps:")
|
280 |
print("1. Set HF_TOKEN in your Hugging Face Space environment")
|
scripts/trackio_tonic/deploy_trackio_space.py
CHANGED
@@ -61,22 +61,55 @@ class TrackioSpaceDeployer:
|
|
61 |
try:
|
62 |
print("Uploading files to Space...")
|
63 |
|
64 |
-
#
|
|
|
|
|
|
|
|
|
65 |
files_to_upload = [
|
66 |
"app.py",
|
67 |
-
"
|
68 |
-
"README.md"
|
69 |
]
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
78 |
else:
|
79 |
-
print(f"β οΈ File not found: {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
return True
|
82 |
|
@@ -89,20 +122,28 @@ class TrackioSpaceDeployer:
|
|
89 |
try:
|
90 |
print("Configuring Space settings...")
|
91 |
|
92 |
-
#
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
title: Trackio Tonic
|
107 |
emoji: π
|
108 |
colorFrom: indigo
|
@@ -119,39 +160,11 @@ short_description: trackio for training monitoring
|
|
119 |
|
120 |
A Gradio interface for experiment tracking and monitoring.
|
121 |
|
122 |
-
## Features
|
123 |
-
|
124 |
-
- Create and manage experiments
|
125 |
-
- Log training metrics and parameters
|
126 |
-
- View experiment details and results
|
127 |
-
- Update experiment status
|
128 |
-
|
129 |
-
## Usage
|
130 |
-
|
131 |
-
1. Create a new experiment using the "Create Experiment" tab
|
132 |
-
2. Log metrics during training using the "Log Metrics" tab
|
133 |
-
3. View experiment details using the "View Experiments" tab
|
134 |
-
4. Update experiment status using the "Update Status" tab
|
135 |
-
|
136 |
-
## Integration
|
137 |
-
|
138 |
-
To connect your training script to this Trackio Space:
|
139 |
-
|
140 |
-
```python
|
141 |
-
from monitoring import SmolLM3Monitor
|
142 |
-
|
143 |
-
monitor = SmolLM3Monitor(
|
144 |
-
experiment_name="my_experiment",
|
145 |
-
trackio_url="{self.space_url}",
|
146 |
-
enable_tracking=True
|
147 |
-
)
|
148 |
-
```
|
149 |
-
|
150 |
Visit: {self.space_url}
|
151 |
"""
|
152 |
-
|
153 |
-
|
154 |
-
f.
|
155 |
|
156 |
return True
|
157 |
|
|
|
61 |
try:
|
62 |
print("Uploading files to Space...")
|
63 |
|
64 |
+
# Get the project root directory (3 levels up from this script)
|
65 |
+
project_root = Path(__file__).parent.parent.parent
|
66 |
+
templates_dir = project_root / "templates" / "spaces"
|
67 |
+
|
68 |
+
# Files to upload from templates/spaces
|
69 |
files_to_upload = [
|
70 |
"app.py",
|
71 |
+
"requirements.txt"
|
|
|
72 |
]
|
73 |
|
74 |
+
# README.md will be created by configure_space method
|
75 |
+
|
76 |
+
# Copy files from templates/spaces to current directory
|
77 |
+
copied_files = []
|
78 |
+
for file_name in files_to_upload:
|
79 |
+
source_path = templates_dir / file_name
|
80 |
+
if source_path.exists():
|
81 |
+
import shutil
|
82 |
+
shutil.copy2(source_path, file_name)
|
83 |
+
copied_files.append(file_name)
|
84 |
+
print(f"β
Copied {file_name} from templates")
|
85 |
else:
|
86 |
+
print(f"β οΈ File not found: {source_path}")
|
87 |
+
|
88 |
+
# Check if we're in a git repository
|
89 |
+
try:
|
90 |
+
subprocess.run(["git", "status"], capture_output=True, check=True)
|
91 |
+
except subprocess.CalledProcessError:
|
92 |
+
print("β οΈ Not in a git repository, initializing...")
|
93 |
+
subprocess.run(["git", "init"], check=True)
|
94 |
+
subprocess.run(["git", "remote", "add", "origin", f"https://huggingface.co/spaces/{self.username}/{self.space_name}"], check=True)
|
95 |
+
|
96 |
+
# Add all files at once
|
97 |
+
existing_files = [f for f in files_to_upload if os.path.exists(f)]
|
98 |
+
if existing_files:
|
99 |
+
subprocess.run(["git", "add"] + existing_files, check=True)
|
100 |
+
subprocess.run(["git", "add", "README.md"], check=True) # Add README.md that was created in configure_space
|
101 |
+
subprocess.run(["git", "commit", "-m", "Initial Space setup"], check=True)
|
102 |
+
|
103 |
+
# Push to the space
|
104 |
+
try:
|
105 |
+
subprocess.run(["git", "push", "origin", "main"], check=True)
|
106 |
+
print(f"β
Uploaded {len(existing_files)} files")
|
107 |
+
except subprocess.CalledProcessError:
|
108 |
+
# Try pushing to master branch if main doesn't exist
|
109 |
+
subprocess.run(["git", "push", "origin", "master"], check=True)
|
110 |
+
print(f"β
Uploaded {len(existing_files)} files")
|
111 |
+
else:
|
112 |
+
print("β οΈ No files found to upload")
|
113 |
|
114 |
return True
|
115 |
|
|
|
122 |
try:
|
123 |
print("Configuring Space settings...")
|
124 |
|
125 |
+
# Get the project root directory (3 levels up from this script)
|
126 |
+
project_root = Path(__file__).parent.parent.parent
|
127 |
+
templates_dir = project_root / "templates" / "spaces"
|
128 |
+
readme_template_path = templates_dir / "README.md"
|
129 |
+
|
130 |
+
# Read README template if it exists
|
131 |
+
if readme_template_path.exists():
|
132 |
+
with open(readme_template_path, 'r', encoding='utf-8') as f:
|
133 |
+
readme_template = f.read()
|
134 |
+
|
135 |
+
# Replace placeholder with actual space URL
|
136 |
+
readme_content = readme_template.replace("{SPACE_URL}", self.space_url)
|
137 |
+
|
138 |
+
# Write README.md for the space
|
139 |
+
with open("README.md", "w", encoding='utf-8') as f:
|
140 |
+
f.write(readme_content)
|
141 |
+
|
142 |
+
print(f"β
Created README.md from template")
|
143 |
+
else:
|
144 |
+
print(f"β οΈ README template not found: {readme_template_path}")
|
145 |
+
# Fallback to basic README
|
146 |
+
basic_readme = f"""---
|
147 |
title: Trackio Tonic
|
148 |
emoji: π
|
149 |
colorFrom: indigo
|
|
|
160 |
|
161 |
A Gradio interface for experiment tracking and monitoring.
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
Visit: {self.space_url}
|
164 |
"""
|
165 |
+
with open("README.md", "w", encoding='utf-8') as f:
|
166 |
+
f.write(basic_readme)
|
167 |
+
print(f"β
Created basic README.md")
|
168 |
|
169 |
return True
|
170 |
|
scripts/training/train.py
CHANGED
@@ -63,11 +63,13 @@ def main():
|
|
63 |
try:
|
64 |
from config.train_smollm3_openhermes_fr_a100_large import get_config as get_large_config
|
65 |
from config.train_smollm3_openhermes_fr_a100_multiple_passes import get_config as get_multiple_passes_config
|
|
|
66 |
|
67 |
# Map config files to their respective functions
|
68 |
config_map = {
|
69 |
"config/train_smollm3_openhermes_fr_a100_large.py": get_large_config,
|
70 |
"config/train_smollm3_openhermes_fr_a100_multiple_passes.py": get_multiple_passes_config,
|
|
|
71 |
}
|
72 |
|
73 |
if args.config in config_map:
|
@@ -81,6 +83,7 @@ def main():
|
|
81 |
print("Available configurations:")
|
82 |
print(" - config/train_smollm3_openhermes_fr_a100_large.py (Large batch, 1.3 passes)")
|
83 |
print(" - config/train_smollm3_openhermes_fr_a100_multiple_passes.py (Multiple passes, 4 epochs)")
|
|
|
84 |
return 1
|
85 |
|
86 |
# Override experiment name if provided
|
@@ -124,6 +127,9 @@ def main():
|
|
124 |
|
125 |
# Import and run training
|
126 |
try:
|
|
|
|
|
|
|
127 |
from train import main as train_main
|
128 |
|
129 |
# Set up training arguments - config is positional, not --config
|
|
|
63 |
try:
|
64 |
from config.train_smollm3_openhermes_fr_a100_large import get_config as get_large_config
|
65 |
from config.train_smollm3_openhermes_fr_a100_multiple_passes import get_config as get_multiple_passes_config
|
66 |
+
from config.train_smollm3_h100_lightweight import config as h100_lightweight_config
|
67 |
|
68 |
# Map config files to their respective functions
|
69 |
config_map = {
|
70 |
"config/train_smollm3_openhermes_fr_a100_large.py": get_large_config,
|
71 |
"config/train_smollm3_openhermes_fr_a100_multiple_passes.py": get_multiple_passes_config,
|
72 |
+
"config/train_smollm3_h100_lightweight.py": lambda x: h100_lightweight_config,
|
73 |
}
|
74 |
|
75 |
if args.config in config_map:
|
|
|
83 |
print("Available configurations:")
|
84 |
print(" - config/train_smollm3_openhermes_fr_a100_large.py (Large batch, 1.3 passes)")
|
85 |
print(" - config/train_smollm3_openhermes_fr_a100_multiple_passes.py (Multiple passes, 4 epochs)")
|
86 |
+
print(" - config/train_smollm3_h100_lightweight.py (H100 lightweight, 80K samples)")
|
87 |
return 1
|
88 |
|
89 |
# Override experiment name if provided
|
|
|
127 |
|
128 |
# Import and run training
|
129 |
try:
|
130 |
+
# Add src directory to path
|
131 |
+
src_path = str(Path(__file__).parent.parent.parent / "src")
|
132 |
+
sys.path.insert(0, src_path)
|
133 |
from train import main as train_main
|
134 |
|
135 |
# Set up training arguments - config is positional, not --config
|
src/data.py
CHANGED
@@ -24,7 +24,9 @@ class SmolLM3Dataset:
|
|
24 |
use_chat_template: bool = True,
|
25 |
chat_template_kwargs: Optional[Dict] = None,
|
26 |
filter_bad_entries: bool = False,
|
27 |
-
bad_entry_field: str = "bad_entry"
|
|
|
|
|
28 |
):
|
29 |
self.data_path = data_path
|
30 |
self.tokenizer = tokenizer
|
@@ -33,6 +35,8 @@ class SmolLM3Dataset:
|
|
33 |
self.chat_template_kwargs = chat_template_kwargs or {}
|
34 |
self.filter_bad_entries = filter_bad_entries
|
35 |
self.bad_entry_field = bad_entry_field
|
|
|
|
|
36 |
|
37 |
# Load and process dataset
|
38 |
self.dataset = self._load_dataset()
|
@@ -89,6 +93,32 @@ class SmolLM3Dataset:
|
|
89 |
filtered_size = len(dataset[split])
|
90 |
logger.info("Filtered %s: %d -> %d samples", split, original_size, filtered_size)
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
# If only 'train' split exists, create validation and test splits
|
93 |
if ("train" in dataset) and ("validation" not in dataset or "test" not in dataset):
|
94 |
logger.info("Automatically splitting train into train/validation/test (98/1/1)")
|
|
|
24 |
use_chat_template: bool = True,
|
25 |
chat_template_kwargs: Optional[Dict] = None,
|
26 |
filter_bad_entries: bool = False,
|
27 |
+
bad_entry_field: str = "bad_entry",
|
28 |
+
sample_size: Optional[int] = None,
|
29 |
+
sample_seed: int = 42
|
30 |
):
|
31 |
self.data_path = data_path
|
32 |
self.tokenizer = tokenizer
|
|
|
35 |
self.chat_template_kwargs = chat_template_kwargs or {}
|
36 |
self.filter_bad_entries = filter_bad_entries
|
37 |
self.bad_entry_field = bad_entry_field
|
38 |
+
self.sample_size = sample_size
|
39 |
+
self.sample_seed = sample_seed
|
40 |
|
41 |
# Load and process dataset
|
42 |
self.dataset = self._load_dataset()
|
|
|
93 |
filtered_size = len(dataset[split])
|
94 |
logger.info("Filtered %s: %d -> %d samples", split, original_size, filtered_size)
|
95 |
|
96 |
+
# Apply sampling if requested
|
97 |
+
if self.sample_size is not None and "train" in dataset:
|
98 |
+
logger.info(f"Sampling {self.sample_size} random samples from {len(dataset['train'])} total samples")
|
99 |
+
import random
|
100 |
+
random.seed(self.sample_seed)
|
101 |
+
|
102 |
+
# Sample indices
|
103 |
+
total_samples = len(dataset["train"])
|
104 |
+
if self.sample_size > total_samples:
|
105 |
+
logger.warning(f"Requested sample size ({self.sample_size}) is larger than dataset size ({total_samples}). Using all samples.")
|
106 |
+
sampled_indices = list(range(total_samples))
|
107 |
+
else:
|
108 |
+
sampled_indices = random.sample(range(total_samples), self.sample_size)
|
109 |
+
|
110 |
+
# Apply sampling to train split
|
111 |
+
dataset["train"] = dataset["train"].select(sampled_indices)
|
112 |
+
logger.info(f"Sampled {len(dataset['train'])} train samples")
|
113 |
+
|
114 |
+
# Also sample validation if it exists and is large
|
115 |
+
if "validation" in dataset and len(dataset["validation"]) > 1000:
|
116 |
+
val_sample_size = min(1000, len(dataset["validation"]))
|
117 |
+
logger.info(f"Sampling {val_sample_size} validation samples from {len(dataset['validation'])} total")
|
118 |
+
val_sampled_indices = random.sample(range(len(dataset["validation"])), val_sample_size)
|
119 |
+
dataset["validation"] = dataset["validation"].select(val_sampled_indices)
|
120 |
+
logger.info(f"Sampled {len(dataset['validation'])} validation samples")
|
121 |
+
|
122 |
# If only 'train' split exists, create validation and test splits
|
123 |
if ("train" in dataset) and ("validation" not in dataset or "test" not in dataset):
|
124 |
logger.info("Automatically splitting train into train/validation/test (98/1/1)")
|
src/train.py
CHANGED
@@ -183,13 +183,15 @@ def main():
|
|
183 |
dataset_path = os.path.join('/input', args.dataset_dir)
|
184 |
logger.info(f"Using local dataset: {dataset_path}")
|
185 |
|
186 |
-
# Load dataset with filtering options
|
187 |
dataset = SmolLM3Dataset(
|
188 |
data_path=dataset_path,
|
189 |
tokenizer=model.tokenizer,
|
190 |
max_seq_length=args.max_seq_length,
|
191 |
filter_bad_entries=getattr(config, 'filter_bad_entries', False),
|
192 |
-
bad_entry_field=getattr(config, 'bad_entry_field', 'bad_entry')
|
|
|
|
|
193 |
)
|
194 |
|
195 |
# Initialize trainer
|
|
|
183 |
dataset_path = os.path.join('/input', args.dataset_dir)
|
184 |
logger.info(f"Using local dataset: {dataset_path}")
|
185 |
|
186 |
+
# Load dataset with filtering options and sampling
|
187 |
dataset = SmolLM3Dataset(
|
188 |
data_path=dataset_path,
|
189 |
tokenizer=model.tokenizer,
|
190 |
max_seq_length=args.max_seq_length,
|
191 |
filter_bad_entries=getattr(config, 'filter_bad_entries', False),
|
192 |
+
bad_entry_field=getattr(config, 'bad_entry_field', 'bad_entry'),
|
193 |
+
sample_size=getattr(config, 'sample_size', None),
|
194 |
+
sample_seed=getattr(config, 'sample_seed', 42)
|
195 |
)
|
196 |
|
197 |
# Initialize trainer
|
templates/datasets/readme.md
CHANGED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
dataset_info:
|
3 |
+
features:
|
4 |
+
- name: experiment_id
|
5 |
+
dtype: string
|
6 |
+
- name: name
|
7 |
+
dtype: string
|
8 |
+
- name: description
|
9 |
+
dtype: string
|
10 |
+
- name: created_at
|
11 |
+
dtype: string
|
12 |
+
- name: status
|
13 |
+
dtype: string
|
14 |
+
- name: metrics
|
15 |
+
dtype: string
|
16 |
+
- name: parameters
|
17 |
+
dtype: string
|
18 |
+
- name: artifacts
|
19 |
+
dtype: string
|
20 |
+
- name: logs
|
21 |
+
dtype: string
|
22 |
+
- name: last_updated
|
23 |
+
dtype: string
|
24 |
+
splits:
|
25 |
+
- name: train
|
26 |
+
num_bytes: 4945
|
27 |
+
num_examples: 2
|
28 |
+
download_size: 15529
|
29 |
+
dataset_size: 4945
|
30 |
+
configs:
|
31 |
+
- config_name: default
|
32 |
+
data_files:
|
33 |
+
- split: train
|
34 |
+
path: data/train-*
|
35 |
+
tags:
|
36 |
+
- trackio
|
37 |
+
- tonic
|
38 |
+
- experiment tracking
|
39 |
+
---
|
40 |
+
|
41 |
+
# Trackio Experiments Dataset
|
42 |
+
|
43 |
+
This dataset stores experiment tracking data for ML training runs, particularly focused on SmolLM3 fine-tuning experiments.
|
44 |
+
|
45 |
+
## Dataset Structure
|
46 |
+
|
47 |
+
The dataset contains the following columns:
|
48 |
+
|
49 |
+
- **experiment_id**: Unique identifier for each experiment
|
50 |
+
- **name**: Human-readable name for the experiment
|
51 |
+
- **description**: Detailed description of the experiment
|
52 |
+
- **created_at**: Timestamp when the experiment was created
|
53 |
+
- **status**: Current status (running, completed, failed, paused)
|
54 |
+
- **metrics**: JSON string containing training metrics over time
|
55 |
+
- **parameters**: JSON string containing experiment configuration
|
56 |
+
- **artifacts**: JSON string containing experiment artifacts
|
57 |
+
- **logs**: JSON string containing experiment logs
|
58 |
+
- **last_updated**: Timestamp of last update
|
59 |
+
|
60 |
+
## Usage
|
61 |
+
|
62 |
+
This dataset is automatically used by the Trackio monitoring system to store and retrieve experiment data. It provides persistent storage for experiment tracking across different training runs.
|
63 |
+
|
64 |
+
## Integration
|
65 |
+
|
66 |
+
The dataset is used by:
|
67 |
+
- Trackio Spaces for experiment visualization
|
68 |
+
- Training scripts for logging metrics and parameters
|
69 |
+
- Monitoring systems for experiment tracking
|
70 |
+
|
71 |
+
## Privacy
|
72 |
+
|
73 |
+
This dataset is private by default to ensure experiment data security. Only users with appropriate permissions can access the data.
|
74 |
+
|
75 |
+
## Examples
|
76 |
+
|
77 |
+
### Sample Experiment Entry
|
78 |
+
```json
|
79 |
+
{
|
80 |
+
"experiment_id": "exp_20250720_130853",
|
81 |
+
"name": "smollm3_finetune",
|
82 |
+
"description": "SmolLM3 fine-tuning experiment",
|
83 |
+
"created_at": "2025-07-20T11:20:01.780908",
|
84 |
+
"status": "running",
|
85 |
+
"metrics": "[{\"timestamp\": \"2025-07-20T11:20:01.780908\", \"step\": 25, \"metrics\": {\"loss\": 1.1659, \"accuracy\": 0.759}}]",
|
86 |
+
"parameters": "{\"model_name\": \"HuggingFaceTB/SmolLM3-3B\", \"batch_size\": 8, \"learning_rate\": 3.5e-06}",
|
87 |
+
"artifacts": "[]",
|
88 |
+
"logs": "[]",
|
89 |
+
"last_updated": "2025-07-20T11:20:01.780908"
|
90 |
+
}
|
91 |
+
```
|
92 |
+
|
93 |
+
## License
|
94 |
+
|
95 |
+
This dataset is part of the Trackio experiment tracking system and follows the same license as the main project.
|
templates/spaces/README.md
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Trackio Tonic
|
3 |
+
emoji: π
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.38.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: true
|
10 |
+
license: mit
|
11 |
+
short_description: trackio for training monitoring
|
12 |
+
---
|
13 |
+
|
14 |
+
# Trackio Experiment Tracking
|
15 |
+
|
16 |
+
A Gradio interface for experiment tracking and monitoring.
|
17 |
+
|
18 |
+
## Features
|
19 |
+
|
20 |
+
- Create and manage experiments
|
21 |
+
- Log training metrics and parameters
|
22 |
+
- View experiment details and results
|
23 |
+
- Update experiment status
|
24 |
+
|
25 |
+
## Usage
|
26 |
+
|
27 |
+
1. Create a new experiment using the "Create Experiment" tab
|
28 |
+
2. Log metrics during training using the "Log Metrics" tab
|
29 |
+
3. View experiment details using the "View Experiments" tab
|
30 |
+
4. Update experiment status using the "Update Status" tab
|
31 |
+
|
32 |
+
## Integration
|
33 |
+
|
34 |
+
To connect your training script to this Trackio Space:
|
35 |
+
|
36 |
+
```python
|
37 |
+
from monitoring import SmolLM3Monitor
|
38 |
+
|
39 |
+
monitor = SmolLM3Monitor(
|
40 |
+
experiment_name="my_experiment",
|
41 |
+
trackio_url="{SPACE_URL}",
|
42 |
+
enable_tracking=True
|
43 |
+
)
|
44 |
+
```
|
45 |
+
|
46 |
+
Visit: {SPACE_URL}
|
templates/spaces/{requirements_space.txt β requirements.txt}
RENAMED
File without changes
|
test_pipeline.py
DELETED
@@ -1,260 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""
|
3 |
-
Test script for the SmolLM3 end-to-end pipeline
|
4 |
-
Verifies all components are working correctly
|
5 |
-
"""
|
6 |
-
|
7 |
-
import os
|
8 |
-
import sys
|
9 |
-
import subprocess
|
10 |
-
import importlib
|
11 |
-
from pathlib import Path
|
12 |
-
|
13 |
-
def test_imports():
|
14 |
-
"""Test that all required modules can be imported"""
|
15 |
-
print("π Testing imports...")
|
16 |
-
|
17 |
-
required_modules = [
|
18 |
-
'torch',
|
19 |
-
'transformers',
|
20 |
-
'datasets',
|
21 |
-
'accelerate',
|
22 |
-
'trl',
|
23 |
-
'huggingface_hub',
|
24 |
-
'requests'
|
25 |
-
]
|
26 |
-
|
27 |
-
failed_imports = []
|
28 |
-
for module in required_modules:
|
29 |
-
try:
|
30 |
-
importlib.import_module(module)
|
31 |
-
print(f"β
{module}")
|
32 |
-
except ImportError as e:
|
33 |
-
print(f"β {module}: {e}")
|
34 |
-
failed_imports.append(module)
|
35 |
-
|
36 |
-
if failed_imports:
|
37 |
-
print(f"\nβ Failed imports: {failed_imports}")
|
38 |
-
return False
|
39 |
-
|
40 |
-
print("β
All imports successful")
|
41 |
-
return True
|
42 |
-
|
43 |
-
def test_local_modules():
|
44 |
-
"""Test local module imports"""
|
45 |
-
print("\nπ Testing local modules...")
|
46 |
-
|
47 |
-
# Add src to path
|
48 |
-
sys.path.append('src')
|
49 |
-
|
50 |
-
local_modules = [
|
51 |
-
'config',
|
52 |
-
'model',
|
53 |
-
'data',
|
54 |
-
'trainer',
|
55 |
-
'monitoring'
|
56 |
-
]
|
57 |
-
|
58 |
-
failed_imports = []
|
59 |
-
for module in local_modules:
|
60 |
-
try:
|
61 |
-
importlib.import_module(module)
|
62 |
-
print(f"β
{module}")
|
63 |
-
except ImportError as e:
|
64 |
-
print(f"β {module}: {e}")
|
65 |
-
failed_imports.append(module)
|
66 |
-
|
67 |
-
if failed_imports:
|
68 |
-
print(f"\nβ Failed local imports: {failed_imports}")
|
69 |
-
return False
|
70 |
-
|
71 |
-
print("β
All local modules imported successfully")
|
72 |
-
return True
|
73 |
-
|
74 |
-
def test_scripts():
|
75 |
-
"""Test script availability"""
|
76 |
-
print("\nπ Testing scripts...")
|
77 |
-
|
78 |
-
required_scripts = [
|
79 |
-
'scripts/trackio_tonic/deploy_trackio_space.py',
|
80 |
-
'scripts/trackio_tonic/configure_trackio.py',
|
81 |
-
'scripts/dataset_tonic/setup_hf_dataset.py',
|
82 |
-
'scripts/model_tonic/push_to_huggingface.py',
|
83 |
-
'src/train.py'
|
84 |
-
]
|
85 |
-
|
86 |
-
missing_scripts = []
|
87 |
-
for script in required_scripts:
|
88 |
-
if Path(script).exists():
|
89 |
-
print(f"β
{script}")
|
90 |
-
else:
|
91 |
-
print(f"β {script}")
|
92 |
-
missing_scripts.append(script)
|
93 |
-
|
94 |
-
if missing_scripts:
|
95 |
-
print(f"\nβ Missing scripts: {missing_scripts}")
|
96 |
-
return False
|
97 |
-
|
98 |
-
print("β
All scripts found")
|
99 |
-
return True
|
100 |
-
|
101 |
-
def test_configs():
|
102 |
-
"""Test configuration files"""
|
103 |
-
print("\nπ Testing configurations...")
|
104 |
-
|
105 |
-
config_dir = Path('config')
|
106 |
-
if not config_dir.exists():
|
107 |
-
print("β config directory not found")
|
108 |
-
return False
|
109 |
-
|
110 |
-
config_files = list(config_dir.glob('*.py'))
|
111 |
-
if not config_files:
|
112 |
-
print("β No configuration files found")
|
113 |
-
return False
|
114 |
-
|
115 |
-
print(f"β
Found {len(config_files)} configuration files:")
|
116 |
-
for config in config_files:
|
117 |
-
print(f" - {config.name}")
|
118 |
-
|
119 |
-
return True
|
120 |
-
|
121 |
-
def test_requirements():
|
122 |
-
"""Test requirements files"""
|
123 |
-
print("\nπ Testing requirements...")
|
124 |
-
|
125 |
-
requirements_dir = Path('requirements')
|
126 |
-
if not requirements_dir.exists():
|
127 |
-
print("β requirements directory not found")
|
128 |
-
return False
|
129 |
-
|
130 |
-
req_files = list(requirements_dir.glob('*.txt'))
|
131 |
-
if not req_files:
|
132 |
-
print("β No requirements files found")
|
133 |
-
return False
|
134 |
-
|
135 |
-
print(f"β
Found {len(req_files)} requirements files:")
|
136 |
-
for req in req_files:
|
137 |
-
print(f" - {req.name}")
|
138 |
-
|
139 |
-
return True
|
140 |
-
|
141 |
-
def test_cuda():
|
142 |
-
"""Test CUDA availability"""
|
143 |
-
print("\nπ Testing CUDA...")
|
144 |
-
|
145 |
-
try:
|
146 |
-
import torch
|
147 |
-
if torch.cuda.is_available():
|
148 |
-
device_count = torch.cuda.device_count()
|
149 |
-
device_name = torch.cuda.get_device_name(0)
|
150 |
-
print(f"β
CUDA available: {device_count} device(s)")
|
151 |
-
print(f" - Device 0: {device_name}")
|
152 |
-
else:
|
153 |
-
print("β οΈ CUDA not available (training will be slower)")
|
154 |
-
except Exception as e:
|
155 |
-
print(f"β CUDA test failed: {e}")
|
156 |
-
return False
|
157 |
-
|
158 |
-
return True
|
159 |
-
|
160 |
-
def test_hf_token():
|
161 |
-
"""Test Hugging Face token"""
|
162 |
-
print("\nπ Testing HF token...")
|
163 |
-
|
164 |
-
token = os.environ.get('HF_TOKEN')
|
165 |
-
if not token:
|
166 |
-
print("β οΈ HF_TOKEN not set (will be prompted during setup)")
|
167 |
-
return True
|
168 |
-
|
169 |
-
try:
|
170 |
-
result = subprocess.run(
|
171 |
-
['huggingface-cli', 'whoami'],
|
172 |
-
capture_output=True,
|
173 |
-
text=True,
|
174 |
-
timeout=10
|
175 |
-
)
|
176 |
-
|
177 |
-
if result.returncode == 0:
|
178 |
-
username = result.stdout.strip()
|
179 |
-
print(f"β
HF token valid: {username}")
|
180 |
-
return True
|
181 |
-
else:
|
182 |
-
print(f"β HF token invalid: {result.stderr}")
|
183 |
-
return False
|
184 |
-
except Exception as e:
|
185 |
-
print(f"β HF token test failed: {e}")
|
186 |
-
return False
|
187 |
-
|
188 |
-
def test_pipeline_components():
|
189 |
-
"""Test individual pipeline components"""
|
190 |
-
print("\nπ Testing pipeline components...")
|
191 |
-
|
192 |
-
# Test setup script
|
193 |
-
if Path('setup_launch.py').exists():
|
194 |
-
print("β
setup_launch.py found")
|
195 |
-
else:
|
196 |
-
print("β setup_launch.py not found")
|
197 |
-
return False
|
198 |
-
|
199 |
-
# Test launch script
|
200 |
-
if Path('launch.sh').exists():
|
201 |
-
print("β
launch.sh found")
|
202 |
-
else:
|
203 |
-
print("β launch.sh not found")
|
204 |
-
return False
|
205 |
-
|
206 |
-
# Test README
|
207 |
-
if Path('README_END_TO_END.md').exists():
|
208 |
-
print("β
README_END_TO_END.md found")
|
209 |
-
else:
|
210 |
-
print("β README_END_TO_END.md not found")
|
211 |
-
return False
|
212 |
-
|
213 |
-
return True
|
214 |
-
|
215 |
-
def main():
|
216 |
-
"""Run all tests"""
|
217 |
-
print("π§ͺ SmolLM3 End-to-End Pipeline Test")
|
218 |
-
print("=" * 50)
|
219 |
-
|
220 |
-
tests = [
|
221 |
-
test_imports,
|
222 |
-
test_local_modules,
|
223 |
-
test_scripts,
|
224 |
-
test_configs,
|
225 |
-
test_requirements,
|
226 |
-
test_cuda,
|
227 |
-
test_hf_token,
|
228 |
-
test_pipeline_components
|
229 |
-
]
|
230 |
-
|
231 |
-
passed = 0
|
232 |
-
total = len(tests)
|
233 |
-
|
234 |
-
for test in tests:
|
235 |
-
try:
|
236 |
-
if test():
|
237 |
-
passed += 1
|
238 |
-
except Exception as e:
|
239 |
-
print(f"β Test failed with exception: {e}")
|
240 |
-
|
241 |
-
print(f"\nπ Test Results: {passed}/{total} passed")
|
242 |
-
|
243 |
-
if passed == total:
|
244 |
-
print("π All tests passed! Pipeline is ready to use.")
|
245 |
-
print("\nπ Next steps:")
|
246 |
-
print("1. Run: python setup_launch.py")
|
247 |
-
print("2. Run: chmod +x launch.sh")
|
248 |
-
print("3. Run: ./launch.sh")
|
249 |
-
else:
|
250 |
-
print("β Some tests failed. Please fix the issues before running the pipeline.")
|
251 |
-
print("\nπ§ Common fixes:")
|
252 |
-
print("1. Install missing packages: pip install -r requirements/requirements_core.txt")
|
253 |
-
print("2. Set HF_TOKEN environment variable")
|
254 |
-
print("3. Check CUDA installation")
|
255 |
-
|
256 |
-
return passed == total
|
257 |
-
|
258 |
-
if __name__ == "__main__":
|
259 |
-
success = main()
|
260 |
-
sys.exit(0 if success else 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_deployment.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Test script to verify deployment scripts work correctly
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
# Add project root to path
|
11 |
+
project_root = Path(__file__).parent
|
12 |
+
sys.path.insert(0, str(project_root))
|
13 |
+
|
14 |
+
def test_templates_exist():
|
15 |
+
"""Test that all required template files exist"""
|
16 |
+
print("π Testing template files...")
|
17 |
+
|
18 |
+
# Check spaces templates
|
19 |
+
spaces_dir = project_root / "templates" / "spaces"
|
20 |
+
spaces_files = ["app.py", "requirements.txt", "README.md"]
|
21 |
+
|
22 |
+
for file_name in spaces_files:
|
23 |
+
file_path = spaces_dir / file_name
|
24 |
+
if file_path.exists():
|
25 |
+
print(f"β
{file_path}")
|
26 |
+
else:
|
27 |
+
print(f"β {file_path} not found")
|
28 |
+
return False
|
29 |
+
|
30 |
+
# Check datasets templates
|
31 |
+
datasets_dir = project_root / "templates" / "datasets"
|
32 |
+
datasets_files = ["readme.md"]
|
33 |
+
|
34 |
+
for file_name in datasets_files:
|
35 |
+
file_path = datasets_dir / file_name
|
36 |
+
if file_path.exists():
|
37 |
+
print(f"β
{file_path}")
|
38 |
+
else:
|
39 |
+
print(f"β {file_path} not found")
|
40 |
+
return False
|
41 |
+
|
42 |
+
return True
|
43 |
+
|
44 |
+
def test_deployment_scripts():
|
45 |
+
"""Test that deployment scripts can import required modules"""
|
46 |
+
print("\nπ Testing deployment scripts...")
|
47 |
+
|
48 |
+
try:
|
49 |
+
# Test space deployment script
|
50 |
+
from scripts.trackio_tonic.deploy_trackio_space import TrackioSpaceDeployer
|
51 |
+
print("β
deploy_trackio_space.py imports successfully")
|
52 |
+
|
53 |
+
# Test dataset setup script
|
54 |
+
from scripts.dataset_tonic.setup_hf_dataset import setup_trackio_dataset
|
55 |
+
print("β
setup_hf_dataset.py imports successfully")
|
56 |
+
|
57 |
+
return True
|
58 |
+
|
59 |
+
except Exception as e:
|
60 |
+
print(f"β Deployment script test failed: {e}")
|
61 |
+
return False
|
62 |
+
|
63 |
+
def test_file_copying():
|
64 |
+
"""Test that file copying logic works"""
|
65 |
+
print("\nπ Testing file copying logic...")
|
66 |
+
|
67 |
+
try:
|
68 |
+
# Test space deployment file copying
|
69 |
+
from scripts.trackio_tonic.deploy_trackio_space import TrackioSpaceDeployer
|
70 |
+
|
71 |
+
# Create a mock deployer
|
72 |
+
deployer = TrackioSpaceDeployer("test-space", "test-user", "test-token")
|
73 |
+
|
74 |
+
# Test that templates directory exists
|
75 |
+
project_root = Path(__file__).parent
|
76 |
+
templates_dir = project_root / "templates" / "spaces"
|
77 |
+
|
78 |
+
if templates_dir.exists():
|
79 |
+
print(f"β
Templates directory exists: {templates_dir}")
|
80 |
+
|
81 |
+
# Check that required files exist
|
82 |
+
for file_name in ["app.py", "requirements.txt", "README.md"]:
|
83 |
+
file_path = templates_dir / file_name
|
84 |
+
if file_path.exists():
|
85 |
+
print(f"β
Template file exists: {file_path}")
|
86 |
+
else:
|
87 |
+
print(f"β Template file missing: {file_path}")
|
88 |
+
return False
|
89 |
+
else:
|
90 |
+
print(f"β Templates directory missing: {templates_dir}")
|
91 |
+
return False
|
92 |
+
|
93 |
+
return True
|
94 |
+
|
95 |
+
except Exception as e:
|
96 |
+
print(f"β File copying test failed: {e}")
|
97 |
+
return False
|
98 |
+
|
99 |
+
def test_readme_inclusion():
|
100 |
+
"""Test that README inclusion logic works"""
|
101 |
+
print("\nπ Testing README inclusion...")
|
102 |
+
|
103 |
+
try:
|
104 |
+
# Test dataset README inclusion
|
105 |
+
from scripts.dataset_tonic.setup_hf_dataset import setup_trackio_dataset
|
106 |
+
|
107 |
+
# Check that README template exists
|
108 |
+
project_root = Path(__file__).parent
|
109 |
+
readme_path = project_root / "templates" / "datasets" / "readme.md"
|
110 |
+
|
111 |
+
if readme_path.exists():
|
112 |
+
print(f"β
README template exists: {readme_path}")
|
113 |
+
|
114 |
+
# Check README content
|
115 |
+
with open(readme_path, 'r', encoding='utf-8') as f:
|
116 |
+
content = f.read()
|
117 |
+
if len(content.strip()) > 0:
|
118 |
+
print(f"β
README has content ({len(content)} characters)")
|
119 |
+
else:
|
120 |
+
print(f"β οΈ README is empty")
|
121 |
+
else:
|
122 |
+
print(f"β README template missing: {readme_path}")
|
123 |
+
return False
|
124 |
+
|
125 |
+
return True
|
126 |
+
|
127 |
+
except Exception as e:
|
128 |
+
print(f"β README inclusion test failed: {e}")
|
129 |
+
return False
|
130 |
+
|
131 |
+
def main():
|
132 |
+
"""Run all tests"""
|
133 |
+
print("π Testing Deployment Scripts")
|
134 |
+
print("=" * 50)
|
135 |
+
|
136 |
+
tests = [
|
137 |
+
test_templates_exist,
|
138 |
+
test_deployment_scripts,
|
139 |
+
test_file_copying,
|
140 |
+
test_readme_inclusion
|
141 |
+
]
|
142 |
+
|
143 |
+
passed = 0
|
144 |
+
total = len(tests)
|
145 |
+
|
146 |
+
for test in tests:
|
147 |
+
if test():
|
148 |
+
passed += 1
|
149 |
+
else:
|
150 |
+
print(f"β Test failed: {test.__name__}")
|
151 |
+
|
152 |
+
print(f"\n{'='*50}")
|
153 |
+
print(f"π Test Results: {passed}/{total} tests passed")
|
154 |
+
|
155 |
+
if passed == total:
|
156 |
+
print("π All tests passed! Deployment scripts are ready to use.")
|
157 |
+
print("\nπ Deployment workflow:")
|
158 |
+
print("1. Space deployment will copy files from templates/spaces/")
|
159 |
+
print("2. Dataset creation will include README from templates/datasets/")
|
160 |
+
print("3. Both scripts will properly upload all required files")
|
161 |
+
return 0
|
162 |
+
else:
|
163 |
+
print("β Some tests failed. Please fix the issues before deployment.")
|
164 |
+
return 1
|
165 |
+
|
166 |
+
if __name__ == "__main__":
|
167 |
+
exit(main())
|
test_formatting_fix.py β tests/test_formatting_fix.py
RENAMED
File without changes
|
tests/test_pipeline.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Quick test script to verify pipeline components
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
# Add project root to path
|
11 |
+
project_root = Path(__file__).parent
|
12 |
+
sys.path.insert(0, str(project_root))
|
13 |
+
|
14 |
+
def test_imports():
|
15 |
+
"""Test that all required modules can be imported"""
|
16 |
+
print("π Testing imports...")
|
17 |
+
|
18 |
+
try:
|
19 |
+
from src.config import get_config
|
20 |
+
print("β
src.config imported successfully")
|
21 |
+
except ImportError as e:
|
22 |
+
print(f"β Failed to import src.config: {e}")
|
23 |
+
return False
|
24 |
+
|
25 |
+
try:
|
26 |
+
from src.model import SmolLM3Model
|
27 |
+
print("β
src.model imported successfully")
|
28 |
+
except ImportError as e:
|
29 |
+
print(f"β Failed to import src.model: {e}")
|
30 |
+
return False
|
31 |
+
|
32 |
+
try:
|
33 |
+
from src.data import SmolLM3Dataset
|
34 |
+
print("β
src.data imported successfully")
|
35 |
+
except ImportError as e:
|
36 |
+
print(f"β Failed to import src.data: {e}")
|
37 |
+
return False
|
38 |
+
|
39 |
+
try:
|
40 |
+
from src.trainer import SmolLM3Trainer
|
41 |
+
print("β
src.trainer imported successfully")
|
42 |
+
except ImportError as e:
|
43 |
+
print(f"β Failed to import src.trainer: {e}")
|
44 |
+
return False
|
45 |
+
|
46 |
+
try:
|
47 |
+
from src.monitoring import create_monitor_from_config
|
48 |
+
print("β
src.monitoring imported successfully")
|
49 |
+
except ImportError as e:
|
50 |
+
print(f"β Failed to import src.monitoring: {e}")
|
51 |
+
return False
|
52 |
+
|
53 |
+
return True
|
54 |
+
|
55 |
+
def test_config_loading():
|
56 |
+
"""Test that configuration files can be loaded"""
|
57 |
+
print("\nπ Testing config loading...")
|
58 |
+
|
59 |
+
config_files = [
|
60 |
+
"config/train_smollm3_h100_lightweight.py",
|
61 |
+
"config/train_smollm3_openhermes_fr_a100_large.py",
|
62 |
+
"config/train_smollm3.py"
|
63 |
+
]
|
64 |
+
|
65 |
+
for config_file in config_files:
|
66 |
+
if os.path.exists(config_file):
|
67 |
+
try:
|
68 |
+
config = get_config(config_file)
|
69 |
+
print(f"β
{config_file} loaded successfully")
|
70 |
+
print(f" Model: {config.model_name}")
|
71 |
+
print(f" Batch size: {config.batch_size}")
|
72 |
+
if hasattr(config, 'sample_size') and config.sample_size:
|
73 |
+
print(f" Sample size: {config.sample_size}")
|
74 |
+
except Exception as e:
|
75 |
+
print(f"β Failed to load {config_file}: {e}")
|
76 |
+
return False
|
77 |
+
else:
|
78 |
+
print(f"β οΈ {config_file} not found")
|
79 |
+
|
80 |
+
return True
|
81 |
+
|
82 |
+
def test_dataset_sampling():
|
83 |
+
"""Test dataset sampling functionality"""
|
84 |
+
print("\nπ Testing dataset sampling...")
|
85 |
+
|
86 |
+
try:
|
87 |
+
from datasets import load_dataset
|
88 |
+
from transformers import AutoTokenizer
|
89 |
+
|
90 |
+
# Load a small test dataset
|
91 |
+
print("Loading test dataset...")
|
92 |
+
dataset = load_dataset("legmlai/openhermes-fr", split="train[:100]")
|
93 |
+
print(f"Loaded {len(dataset)} samples")
|
94 |
+
|
95 |
+
# Test tokenizer
|
96 |
+
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM3-3B")
|
97 |
+
print("β
Tokenizer loaded successfully")
|
98 |
+
|
99 |
+
# Test dataset with sampling
|
100 |
+
from src.data import SmolLM3Dataset
|
101 |
+
|
102 |
+
dataset_handler = SmolLM3Dataset(
|
103 |
+
data_path="legmlai/openhermes-fr",
|
104 |
+
tokenizer=tokenizer,
|
105 |
+
max_seq_length=1024,
|
106 |
+
sample_size=50, # Sample 50 from the 100 we loaded
|
107 |
+
sample_seed=42
|
108 |
+
)
|
109 |
+
|
110 |
+
train_dataset = dataset_handler.get_train_dataset()
|
111 |
+
print(f"β
Dataset sampling works: {len(train_dataset)} samples")
|
112 |
+
|
113 |
+
return True
|
114 |
+
|
115 |
+
except Exception as e:
|
116 |
+
print(f"β Dataset sampling test failed: {e}")
|
117 |
+
return False
|
118 |
+
|
119 |
+
def main():
|
120 |
+
"""Run all tests"""
|
121 |
+
print("π Testing SmolLM3 Pipeline Components")
|
122 |
+
print("=" * 50)
|
123 |
+
|
124 |
+
tests = [
|
125 |
+
test_imports,
|
126 |
+
test_config_loading,
|
127 |
+
test_dataset_sampling
|
128 |
+
]
|
129 |
+
|
130 |
+
passed = 0
|
131 |
+
total = len(tests)
|
132 |
+
|
133 |
+
for test in tests:
|
134 |
+
if test():
|
135 |
+
passed += 1
|
136 |
+
else:
|
137 |
+
print(f"β Test failed: {test.__name__}")
|
138 |
+
|
139 |
+
print(f"\n{'='*50}")
|
140 |
+
print(f"π Test Results: {passed}/{total} tests passed")
|
141 |
+
|
142 |
+
if passed == total:
|
143 |
+
print("π All tests passed! Pipeline is ready to run.")
|
144 |
+
return 0
|
145 |
+
else:
|
146 |
+
print("β Some tests failed. Please fix the issues before running the pipeline.")
|
147 |
+
return 1
|
148 |
+
|
149 |
+
if __name__ == "__main__":
|
150 |
+
exit(main())
|
tests/test_readme_template.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Test script to verify README template replacement works correctly
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
# Add project root to path
|
11 |
+
project_root = Path(__file__).parent
|
12 |
+
sys.path.insert(0, str(project_root))
|
13 |
+
|
14 |
+
def test_readme_template():
|
15 |
+
"""Test README template replacement"""
|
16 |
+
print("π Testing README template replacement...")
|
17 |
+
|
18 |
+
try:
|
19 |
+
# Get template path
|
20 |
+
templates_dir = project_root / "templates" / "spaces"
|
21 |
+
readme_template_path = templates_dir / "README.md"
|
22 |
+
|
23 |
+
if not readme_template_path.exists():
|
24 |
+
print(f"β README template not found: {readme_template_path}")
|
25 |
+
return False
|
26 |
+
|
27 |
+
# Read template
|
28 |
+
with open(readme_template_path, 'r', encoding='utf-8') as f:
|
29 |
+
template_content = f.read()
|
30 |
+
|
31 |
+
print(f"β
README template loaded ({len(template_content)} characters)")
|
32 |
+
|
33 |
+
# Test placeholder replacement
|
34 |
+
test_space_url = "https://huggingface.co/spaces/test-user/test-space"
|
35 |
+
replaced_content = template_content.replace("{SPACE_URL}", test_space_url)
|
36 |
+
|
37 |
+
if "{SPACE_URL}" in replaced_content:
|
38 |
+
print("β Placeholder replacement failed")
|
39 |
+
return False
|
40 |
+
|
41 |
+
if test_space_url not in replaced_content:
|
42 |
+
print("β Space URL not found in replaced content")
|
43 |
+
return False
|
44 |
+
|
45 |
+
print("β
Placeholder replacement works correctly")
|
46 |
+
print(f"β
Space URL: {test_space_url}")
|
47 |
+
|
48 |
+
return True
|
49 |
+
|
50 |
+
except Exception as e:
|
51 |
+
print(f"β README template test failed: {e}")
|
52 |
+
return False
|
53 |
+
|
54 |
+
def test_deployment_readme():
|
55 |
+
"""Test that deployment script can use README template"""
|
56 |
+
print("\nπ Testing deployment script README usage...")
|
57 |
+
|
58 |
+
try:
|
59 |
+
from scripts.trackio_tonic.deploy_trackio_space import TrackioSpaceDeployer
|
60 |
+
|
61 |
+
# Create a mock deployer
|
62 |
+
deployer = TrackioSpaceDeployer("test-space", "test-user", "test-token")
|
63 |
+
|
64 |
+
# Test that templates directory exists
|
65 |
+
project_root = Path(__file__).parent
|
66 |
+
templates_dir = project_root / "templates" / "spaces"
|
67 |
+
readme_template_path = templates_dir / "README.md"
|
68 |
+
|
69 |
+
if readme_template_path.exists():
|
70 |
+
print(f"β
README template exists: {readme_template_path}")
|
71 |
+
|
72 |
+
# Test reading template
|
73 |
+
with open(readme_template_path, 'r', encoding='utf-8') as f:
|
74 |
+
content = f.read()
|
75 |
+
if "{SPACE_URL}" in content:
|
76 |
+
print("β
Template contains placeholder")
|
77 |
+
else:
|
78 |
+
print("β οΈ Template missing placeholder")
|
79 |
+
|
80 |
+
return True
|
81 |
+
else:
|
82 |
+
print(f"β README template missing: {readme_template_path}")
|
83 |
+
return False
|
84 |
+
|
85 |
+
except Exception as e:
|
86 |
+
print(f"β Deployment README test failed: {e}")
|
87 |
+
return False
|
88 |
+
|
89 |
+
def main():
|
90 |
+
"""Run all tests"""
|
91 |
+
print("π Testing README Template System")
|
92 |
+
print("=" * 50)
|
93 |
+
|
94 |
+
tests = [
|
95 |
+
test_readme_template,
|
96 |
+
test_deployment_readme
|
97 |
+
]
|
98 |
+
|
99 |
+
passed = 0
|
100 |
+
total = len(tests)
|
101 |
+
|
102 |
+
for test in tests:
|
103 |
+
if test():
|
104 |
+
passed += 1
|
105 |
+
else:
|
106 |
+
print(f"β Test failed: {test.__name__}")
|
107 |
+
|
108 |
+
print(f"\n{'='*50}")
|
109 |
+
print(f"π Test Results: {passed}/{total} tests passed")
|
110 |
+
|
111 |
+
if passed == total:
|
112 |
+
print("π All tests passed! README template system is working correctly.")
|
113 |
+
print("\nπ Template workflow:")
|
114 |
+
print("1. README template is read from templates/spaces/README.md")
|
115 |
+
print("2. {SPACE_URL} placeholder is replaced with actual space URL")
|
116 |
+
print("3. Customized README is written to the space")
|
117 |
+
return 0
|
118 |
+
else:
|
119 |
+
print("β Some tests failed. Please fix the issues before deployment.")
|
120 |
+
return 1
|
121 |
+
|
122 |
+
if __name__ == "__main__":
|
123 |
+
exit(main())
|
tests/test_simple_pipeline.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Simple test script for the simplified pipeline approach
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
# Add project root to path
|
11 |
+
project_root = Path(__file__).parent
|
12 |
+
sys.path.insert(0, str(project_root))
|
13 |
+
|
14 |
+
def test_simple_training_script():
|
15 |
+
"""Test the simplified training script"""
|
16 |
+
print("π Testing simplified training script...")
|
17 |
+
|
18 |
+
try:
|
19 |
+
# Test that the training script can be imported
|
20 |
+
from scripts.training.train import main as train_main
|
21 |
+
print("β
Training script imported successfully")
|
22 |
+
|
23 |
+
# Test config loading
|
24 |
+
from config.train_smollm3_h100_lightweight import config as h100_config
|
25 |
+
print("β
H100 lightweight config loaded successfully")
|
26 |
+
print(f" Model: {h100_config.model_name}")
|
27 |
+
print(f" Batch size: {h100_config.batch_size}")
|
28 |
+
print(f" Sample size: {h100_config.sample_size}")
|
29 |
+
|
30 |
+
return True
|
31 |
+
|
32 |
+
except Exception as e:
|
33 |
+
print(f"β Training script test failed: {e}")
|
34 |
+
return False
|
35 |
+
|
36 |
+
def test_config_files():
|
37 |
+
"""Test that all required config files exist"""
|
38 |
+
print("\nπ Testing config files...")
|
39 |
+
|
40 |
+
config_files = [
|
41 |
+
"config/train_smollm3_h100_lightweight.py",
|
42 |
+
"config/train_smollm3_openhermes_fr_a100_large.py",
|
43 |
+
"config/train_smollm3_openhermes_fr_a100_multiple_passes.py"
|
44 |
+
]
|
45 |
+
|
46 |
+
for config_file in config_files:
|
47 |
+
if os.path.exists(config_file):
|
48 |
+
print(f"β
{config_file}")
|
49 |
+
else:
|
50 |
+
print(f"β {config_file} not found")
|
51 |
+
return False
|
52 |
+
|
53 |
+
return True
|
54 |
+
|
55 |
+
def test_scripts():
|
56 |
+
"""Test that all required scripts exist"""
|
57 |
+
print("\nπ Testing scripts...")
|
58 |
+
|
59 |
+
script_files = [
|
60 |
+
"scripts/training/train.py",
|
61 |
+
"scripts/trackio_tonic/deploy_trackio_space.py",
|
62 |
+
"scripts/trackio_tonic/configure_trackio.py",
|
63 |
+
"scripts/dataset_tonic/setup_hf_dataset.py",
|
64 |
+
"scripts/model_tonic/push_to_huggingface.py"
|
65 |
+
]
|
66 |
+
|
67 |
+
for script_file in script_files:
|
68 |
+
if os.path.exists(script_file):
|
69 |
+
print(f"β
{script_file}")
|
70 |
+
else:
|
71 |
+
print(f"β {script_file} not found")
|
72 |
+
return False
|
73 |
+
|
74 |
+
return True
|
75 |
+
|
76 |
+
def test_launch_script():
|
77 |
+
"""Test that the launch script exists and is executable"""
|
78 |
+
print("\nπ Testing launch script...")
|
79 |
+
|
80 |
+
launch_script = "launch.sh"
|
81 |
+
if os.path.exists(launch_script):
|
82 |
+
print(f"β
{launch_script} exists")
|
83 |
+
|
84 |
+
# Check if it's executable
|
85 |
+
if os.access(launch_script, os.X_OK):
|
86 |
+
print(f"β
{launch_script} is executable")
|
87 |
+
else:
|
88 |
+
print(f"β οΈ {launch_script} is not executable (run: chmod +x launch.sh)")
|
89 |
+
|
90 |
+
return True
|
91 |
+
else:
|
92 |
+
print(f"β {launch_script} not found")
|
93 |
+
return False
|
94 |
+
|
95 |
+
def main():
|
96 |
+
"""Run all tests"""
|
97 |
+
print("π Testing Simplified SmolLM3 Pipeline")
|
98 |
+
print("=" * 50)
|
99 |
+
|
100 |
+
tests = [
|
101 |
+
test_simple_training_script,
|
102 |
+
test_config_files,
|
103 |
+
test_scripts,
|
104 |
+
test_launch_script
|
105 |
+
]
|
106 |
+
|
107 |
+
passed = 0
|
108 |
+
total = len(tests)
|
109 |
+
|
110 |
+
for test in tests:
|
111 |
+
if test():
|
112 |
+
passed += 1
|
113 |
+
else:
|
114 |
+
print(f"β Test failed: {test.__name__}")
|
115 |
+
|
116 |
+
print(f"\n{'='*50}")
|
117 |
+
print(f"π Test Results: {passed}/{total} tests passed")
|
118 |
+
|
119 |
+
if passed == total:
|
120 |
+
print("π All tests passed! Simplified pipeline is ready to run.")
|
121 |
+
print("\nπ To run the pipeline:")
|
122 |
+
print("1. chmod +x launch.sh")
|
123 |
+
print("2. ./launch.sh")
|
124 |
+
return 0
|
125 |
+
else:
|
126 |
+
print("β Some tests failed. Please fix the issues before running the pipeline.")
|
127 |
+
return 1
|
128 |
+
|
129 |
+
if __name__ == "__main__":
|
130 |
+
exit(main())
|