Spaces:
Running
Running
adds repoid only based on repo name, adds version-robust sfttrainer
Browse files
launch.sh
CHANGED
@@ -831,8 +831,11 @@ get_input "Experiment name" "smollm3_finetune_$(date +%Y%m%d_%H%M%S)" EXPERIMENT
|
|
831 |
|
832 |
# Configure model repository name (customizable)
|
833 |
print_info "Setting up model repository name..."
|
834 |
-
|
835 |
-
|
|
|
|
|
|
|
836 |
print_status "Model repository: $REPO_NAME"
|
837 |
|
838 |
# Automatically create dataset repository
|
@@ -1311,10 +1314,10 @@ export HF_USERNAME="$HF_USERNAME"
|
|
1311 |
--hf-username "$HF_USERNAME" \
|
1312 |
--model-id "$DEMO_MODEL_ID" \
|
1313 |
--subfolder "$DEMO_SUBFOLDER" \
|
1314 |
-
--space-name "${
|
1315 |
|
1316 |
if [ $? -eq 0 ]; then
|
1317 |
-
DEMO_SPACE_URL="https://huggingface.co/spaces/$HF_USERNAME/${
|
1318 |
print_status "โ
Demo space deployed successfully: $DEMO_SPACE_URL"
|
1319 |
else
|
1320 |
print_warning "โ ๏ธ Demo space deployment failed, but continuing with pipeline"
|
@@ -1385,7 +1388,7 @@ echo "๐ Trackio: $TRACKIO_URL"
|
|
1385 |
echo "๐ Experiment: $EXPERIMENT_NAME"
|
1386 |
echo "๐ Dataset: https://huggingface.co/datasets/$TRACKIO_DATASET_REPO"
|
1387 |
$(if [ "$DEPLOY_DEMO" = "y" ] || [ "$DEPLOY_DEMO" = "Y" ]; then
|
1388 |
-
echo "๐ฎ Demo: https://huggingface.co/spaces/$HF_USERNAME/${
|
1389 |
fi)
|
1390 |
echo ""
|
1391 |
echo "๐ Summary report saved to: training_summary.md"
|
|
|
831 |
|
832 |
# Configure model repository name (customizable)
|
833 |
print_info "Setting up model repository name..."
|
834 |
+
# Ask only for short repo name; we'll prefix with username automatically
|
835 |
+
DEFAULT_SHORT_REPO="smolfactory-$(date +%Y%m%d)"
|
836 |
+
get_input "Model repository name (repo only, no username/)" "$DEFAULT_SHORT_REPO" REPO_SHORT
|
837 |
+
# Build full repo id using detected username
|
838 |
+
REPO_NAME="$HF_USERNAME/$REPO_SHORT"
|
839 |
print_status "Model repository: $REPO_NAME"
|
840 |
|
841 |
# Automatically create dataset repository
|
|
|
1314 |
--hf-username "$HF_USERNAME" \
|
1315 |
--model-id "$DEMO_MODEL_ID" \
|
1316 |
--subfolder "$DEMO_SUBFOLDER" \
|
1317 |
+
--space-name "${REPO_SHORT}-demo"
|
1318 |
|
1319 |
if [ $? -eq 0 ]; then
|
1320 |
+
DEMO_SPACE_URL="https://huggingface.co/spaces/$HF_USERNAME/${REPO_SHORT}-demo"
|
1321 |
print_status "โ
Demo space deployed successfully: $DEMO_SPACE_URL"
|
1322 |
else
|
1323 |
print_warning "โ ๏ธ Demo space deployment failed, but continuing with pipeline"
|
|
|
1388 |
echo "๐ Experiment: $EXPERIMENT_NAME"
|
1389 |
echo "๐ Dataset: https://huggingface.co/datasets/$TRACKIO_DATASET_REPO"
|
1390 |
$(if [ "$DEPLOY_DEMO" = "y" ] || [ "$DEPLOY_DEMO" = "Y" ]; then
|
1391 |
+
echo "๐ฎ Demo: https://huggingface.co/spaces/$HF_USERNAME/${REPO_SHORT}-demo"
|
1392 |
fi)
|
1393 |
echo ""
|
1394 |
echo "๐ Summary report saved to: training_summary.md"
|
scripts/deploy_demo_space.py
CHANGED
@@ -42,9 +42,10 @@ class DemoSpaceDeployer:
|
|
42 |
demo_type: Optional[str] = None):
|
43 |
self.hf_token = hf_token
|
44 |
self.hf_username = hf_username
|
45 |
-
|
|
|
46 |
self.subfolder = subfolder
|
47 |
-
self.space_name = space_name or f"{model_id.split('/')[-1]}-demo"
|
48 |
self.space_id = f"{hf_username}/{self.space_name}"
|
49 |
self.space_url = f"https://huggingface.co/spaces/{self.space_id}"
|
50 |
|
|
|
42 |
demo_type: Optional[str] = None):
|
43 |
self.hf_token = hf_token
|
44 |
self.hf_username = hf_username
|
45 |
+
# Allow passing just a repo name without username and auto-prefix
|
46 |
+
self.model_id = model_id if "/" in model_id else f"{hf_username}/{model_id}"
|
47 |
self.subfolder = subfolder
|
48 |
+
self.space_name = space_name or f"{self.model_id.split('/')[-1]}-demo"
|
49 |
self.space_id = f"{hf_username}/{self.space_name}"
|
50 |
self.space_url = f"https://huggingface.co/spaces/{self.space_id}"
|
51 |
|
scripts/model_tonic/push_gpt_oss_to_huggingface.py
CHANGED
@@ -247,12 +247,35 @@ This model is licensed under the MIT License.
|
|
247 |
|
248 |
return card_content
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experiment_name, dataset_repo, author_name, model_description, training_config_type=None, model_name=None, dataset_name=None, batch_size=None, learning_rate=None, max_epochs=None, max_seq_length=None, trainer_type=None):
|
251 |
"""Push GPT-OSS model to Hugging Face Hub"""
|
252 |
|
253 |
print("=== GPT-OSS Model Push Pipeline ===")
|
254 |
print(f"Checkpoint: {checkpoint_path}")
|
255 |
-
|
|
|
256 |
print(f"Experiment: {experiment_name}")
|
257 |
print(f"Author: {author_name}")
|
258 |
|
@@ -276,7 +299,7 @@ def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experi
|
|
276 |
# Create model card
|
277 |
print("Creating model card...")
|
278 |
model_card_content = create_gpt_oss_model_card(
|
279 |
-
model_name=
|
280 |
experiment_name=experiment_name,
|
281 |
trackio_url=trackio_url,
|
282 |
dataset_repo=dataset_repo,
|
@@ -297,18 +320,18 @@ def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experi
|
|
297 |
f.write(model_card_content)
|
298 |
|
299 |
# Push to Hugging Face Hub
|
300 |
-
print(f"Pushing model to: {
|
301 |
|
302 |
# Set HF token
|
303 |
os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
|
304 |
|
305 |
# Push using transformers
|
306 |
from huggingface_hub import HfApi
|
307 |
-
api = HfApi()
|
308 |
|
309 |
# Create repository if it doesn't exist
|
310 |
try:
|
311 |
-
api.create_repo(
|
312 |
except Exception as e:
|
313 |
print(f"Warning: Could not create repository: {e}")
|
314 |
|
@@ -316,12 +339,12 @@ def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experi
|
|
316 |
print("Uploading model files...")
|
317 |
api.upload_folder(
|
318 |
folder_path=temp_output,
|
319 |
-
repo_id=
|
320 |
repo_type="model"
|
321 |
)
|
322 |
|
323 |
print("โ
GPT-OSS model pushed successfully!")
|
324 |
-
print(f"Model URL: https://huggingface.co/{
|
325 |
|
326 |
# Clean up
|
327 |
import shutil
|
|
|
247 |
|
248 |
return card_content
|
249 |
|
250 |
+
def _resolve_repo_id(repo_name: str, hf_token: str) -> str:
|
251 |
+
"""Resolve to username/repo if only repo name was provided."""
|
252 |
+
try:
|
253 |
+
if "/" in repo_name:
|
254 |
+
return repo_name
|
255 |
+
from huggingface_hub import HfApi
|
256 |
+
username = None
|
257 |
+
if hf_token:
|
258 |
+
try:
|
259 |
+
api = HfApi(token=hf_token)
|
260 |
+
info = api.whoami()
|
261 |
+
username = info.get("name") or info.get("username")
|
262 |
+
except Exception:
|
263 |
+
username = None
|
264 |
+
if not username:
|
265 |
+
username = os.getenv("HF_USERNAME")
|
266 |
+
if not username:
|
267 |
+
raise ValueError("Could not determine HF username. Set HF_USERNAME or pass username/repo.")
|
268 |
+
return f"{username}/{repo_name}"
|
269 |
+
except Exception:
|
270 |
+
return repo_name
|
271 |
+
|
272 |
def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experiment_name, dataset_repo, author_name, model_description, training_config_type=None, model_name=None, dataset_name=None, batch_size=None, learning_rate=None, max_epochs=None, max_seq_length=None, trainer_type=None):
|
273 |
"""Push GPT-OSS model to Hugging Face Hub"""
|
274 |
|
275 |
print("=== GPT-OSS Model Push Pipeline ===")
|
276 |
print(f"Checkpoint: {checkpoint_path}")
|
277 |
+
full_repo_id = _resolve_repo_id(repo_name, hf_token)
|
278 |
+
print(f"Repository: {full_repo_id}")
|
279 |
print(f"Experiment: {experiment_name}")
|
280 |
print(f"Author: {author_name}")
|
281 |
|
|
|
299 |
# Create model card
|
300 |
print("Creating model card...")
|
301 |
model_card_content = create_gpt_oss_model_card(
|
302 |
+
model_name=full_repo_id,
|
303 |
experiment_name=experiment_name,
|
304 |
trackio_url=trackio_url,
|
305 |
dataset_repo=dataset_repo,
|
|
|
320 |
f.write(model_card_content)
|
321 |
|
322 |
# Push to Hugging Face Hub
|
323 |
+
print(f"Pushing model to: {full_repo_id}")
|
324 |
|
325 |
# Set HF token
|
326 |
os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
|
327 |
|
328 |
# Push using transformers
|
329 |
from huggingface_hub import HfApi
|
330 |
+
api = HfApi(token=hf_token)
|
331 |
|
332 |
# Create repository if it doesn't exist
|
333 |
try:
|
334 |
+
api.create_repo(full_repo_id, private=False, exist_ok=True)
|
335 |
except Exception as e:
|
336 |
print(f"Warning: Could not create repository: {e}")
|
337 |
|
|
|
339 |
print("Uploading model files...")
|
340 |
api.upload_folder(
|
341 |
folder_path=temp_output,
|
342 |
+
repo_id=full_repo_id,
|
343 |
repo_type="model"
|
344 |
)
|
345 |
|
346 |
print("โ
GPT-OSS model pushed successfully!")
|
347 |
+
print(f"Model URL: https://huggingface.co/{full_repo_id}")
|
348 |
|
349 |
# Clean up
|
350 |
import shutil
|
scripts/model_tonic/push_to_huggingface.py
CHANGED
@@ -73,6 +73,7 @@ class HuggingFacePusher:
|
|
73 |
trainer_type: Optional[str] = None
|
74 |
):
|
75 |
self.model_path = Path(model_path)
|
|
|
76 |
self.repo_name = repo_name
|
77 |
self.token = token or hf_token or os.getenv('HF_TOKEN')
|
78 |
self.private = private
|
@@ -101,6 +102,9 @@ class HuggingFacePusher:
|
|
101 |
else:
|
102 |
raise ImportError("huggingface_hub is required. Install with: pip install huggingface_hub")
|
103 |
|
|
|
|
|
|
|
104 |
# Initialize monitoring if available
|
105 |
self.monitor = None
|
106 |
if MONITORING_AVAILABLE:
|
@@ -112,25 +116,60 @@ class HuggingFacePusher:
|
|
112 |
dataset_repo=self.dataset_repo
|
113 |
)
|
114 |
|
115 |
-
logger.info(f"Initialized HuggingFacePusher for {
|
116 |
logger.info(f"Dataset repository: {self.dataset_repo}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
def create_repository(self) -> bool:
|
119 |
"""Create the Hugging Face repository"""
|
120 |
try:
|
121 |
-
logger.info(f"Creating repository: {self.
|
122 |
|
123 |
# Create repository with timeout handling
|
124 |
try:
|
125 |
# Create repository
|
126 |
create_repo(
|
127 |
-
repo_id=self.
|
128 |
token=self.token,
|
129 |
private=self.private,
|
130 |
exist_ok=True
|
131 |
)
|
132 |
|
133 |
-
logger.info(f"โ
Repository created: https://huggingface.co/{self.
|
134 |
return True
|
135 |
|
136 |
except Exception as e:
|
@@ -189,8 +228,8 @@ class HuggingFacePusher:
|
|
189 |
|
190 |
# Update with actual values
|
191 |
variables.update({
|
192 |
-
"repo_name": self.
|
193 |
-
"model_name": self.
|
194 |
"experiment_name": self.experiment_name or "model_push",
|
195 |
"dataset_repo": self.dataset_repo,
|
196 |
"author_name": self.author_name or "Model Author",
|
@@ -238,7 +277,7 @@ pipeline_tag: text-generation
|
|
238 |
base_model: HuggingFaceTB/SmolLM3-3B
|
239 |
---
|
240 |
|
241 |
-
# {self.
|
242 |
|
243 |
This is a fine-tuned SmolLM3 model based on the HuggingFaceTB/SmolLM3-3B architecture.
|
244 |
|
@@ -269,8 +308,8 @@ This is a fine-tuned SmolLM3 model based on the HuggingFaceTB/SmolLM3-3B archite
|
|
269 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
270 |
|
271 |
# Load model and tokenizer
|
272 |
-
model = AutoModelForCausalLM.from_pretrained("{self.
|
273 |
-
tokenizer = AutoTokenizer.from_pretrained("{self.
|
274 |
|
275 |
# Generate text
|
276 |
inputs = tokenizer("Hello, how are you?", return_tensors="pt")
|
@@ -346,7 +385,7 @@ This model is licensed under the Apache 2.0 License.
|
|
346 |
upload_file(
|
347 |
path_or_fileobj=str(file_path),
|
348 |
path_in_repo=remote_path,
|
349 |
-
repo_id=self.
|
350 |
token=self.token
|
351 |
)
|
352 |
logger.info(f"โ
Uploaded {relative_path}")
|
@@ -381,7 +420,7 @@ This model is licensed under the Apache 2.0 License.
|
|
381 |
upload_file(
|
382 |
path_or_fileobj=str(file_path),
|
383 |
path_in_repo=f"training_results/{file_name}",
|
384 |
-
repo_id=self.
|
385 |
token=self.token
|
386 |
)
|
387 |
|
@@ -397,7 +436,7 @@ This model is licensed under the Apache 2.0 License.
|
|
397 |
try:
|
398 |
logger.info("Creating README.md...")
|
399 |
|
400 |
-
readme_content = f"""# {self.
|
401 |
|
402 |
A fine-tuned SmolLM3 model for text generation tasks.
|
403 |
|
@@ -406,8 +445,8 @@ A fine-tuned SmolLM3 model for text generation tasks.
|
|
406 |
```python
|
407 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
408 |
|
409 |
-
model = AutoModelForCausalLM.from_pretrained("{self.
|
410 |
-
tokenizer = AutoTokenizer.from_pretrained("{self.
|
411 |
|
412 |
# Generate text
|
413 |
text = "Hello, how are you?"
|
@@ -463,7 +502,7 @@ MIT License
|
|
463 |
path_or_fileobj=str(readme_path),
|
464 |
path_in_repo="README.md",
|
465 |
token=self.token,
|
466 |
-
repo_id=self.
|
467 |
)
|
468 |
|
469 |
# Clean up
|
@@ -483,7 +522,7 @@ MIT License
|
|
483 |
# Log to Trackio
|
484 |
self.monitor.log_metrics({
|
485 |
"push_action": action,
|
486 |
-
"repo_name": self.
|
487 |
"model_size_gb": self._get_model_size(),
|
488 |
"dataset_repo": self.dataset_repo,
|
489 |
**details
|
@@ -492,7 +531,7 @@ MIT License
|
|
492 |
# Log training summary
|
493 |
self.monitor.log_training_summary({
|
494 |
"model_push": True,
|
495 |
-
"model_repo": self.
|
496 |
"dataset_repo": self.dataset_repo,
|
497 |
"push_date": datetime.now().isoformat(),
|
498 |
**details
|
@@ -505,7 +544,7 @@ MIT License
|
|
505 |
def push_model(self, training_config: Optional[Dict[str, Any]] = None,
|
506 |
results: Optional[Dict[str, Any]] = None) -> bool:
|
507 |
"""Complete model push process with HF Datasets integration"""
|
508 |
-
logger.info(f"๐ Starting model push to {self.
|
509 |
logger.info(f"๐ Dataset repository: {self.dataset_repo}")
|
510 |
|
511 |
# Validate model path
|
@@ -533,7 +572,7 @@ MIT License
|
|
533 |
upload_file(
|
534 |
path_or_fileobj=str(model_card_path),
|
535 |
path_in_repo="README.md",
|
536 |
-
repo_id=self.
|
537 |
token=self.token
|
538 |
)
|
539 |
finally:
|
@@ -556,7 +595,7 @@ MIT License
|
|
556 |
"results": results
|
557 |
})
|
558 |
|
559 |
-
logger.info(f"๐ Model successfully pushed to: https://huggingface.co/{self.
|
560 |
logger.info(f"๐ Experiment data stored in: {self.dataset_repo}")
|
561 |
return True
|
562 |
|
@@ -582,7 +621,7 @@ def parse_args():
|
|
582 |
|
583 |
# Required arguments
|
584 |
parser.add_argument('model_path', type=str, help='Path to trained model directory')
|
585 |
-
parser.add_argument('repo_name', type=str, help='Hugging Face repository name (
|
586 |
|
587 |
# Optional arguments
|
588 |
parser.add_argument('--token', type=str, default=None, help='Hugging Face token')
|
|
|
73 |
trainer_type: Optional[str] = None
|
74 |
):
|
75 |
self.model_path = Path(model_path)
|
76 |
+
# Original user input (may be just the repo name without username)
|
77 |
self.repo_name = repo_name
|
78 |
self.token = token or hf_token or os.getenv('HF_TOKEN')
|
79 |
self.private = private
|
|
|
102 |
else:
|
103 |
raise ImportError("huggingface_hub is required. Install with: pip install huggingface_hub")
|
104 |
|
105 |
+
# Resolve the full repo id (username/repo) if user only provided repo name
|
106 |
+
self.repo_id = self._resolve_repo_id(self.repo_name)
|
107 |
+
|
108 |
# Initialize monitoring if available
|
109 |
self.monitor = None
|
110 |
if MONITORING_AVAILABLE:
|
|
|
116 |
dataset_repo=self.dataset_repo
|
117 |
)
|
118 |
|
119 |
+
logger.info(f"Initialized HuggingFacePusher for {self.repo_id}")
|
120 |
logger.info(f"Dataset repository: {self.dataset_repo}")
|
121 |
+
|
122 |
+
def _resolve_repo_id(self, repo_name: str) -> str:
|
123 |
+
"""Return a fully-qualified repo id in the form username/repo.
|
124 |
+
|
125 |
+
If the provided name already contains a '/', it is returned unchanged.
|
126 |
+
Otherwise, we attempt to derive the username from the authenticated token
|
127 |
+
or from the HF_USERNAME environment variable.
|
128 |
+
"""
|
129 |
+
try:
|
130 |
+
if "/" in repo_name:
|
131 |
+
return repo_name
|
132 |
+
|
133 |
+
# Need a username. Prefer API whoami(), fallback to env HF_USERNAME
|
134 |
+
username: Optional[str] = None
|
135 |
+
if self.token:
|
136 |
+
try:
|
137 |
+
user_info = self.api.whoami()
|
138 |
+
username = user_info.get("name") or user_info.get("username")
|
139 |
+
except Exception:
|
140 |
+
username = None
|
141 |
+
|
142 |
+
if not username:
|
143 |
+
username = os.getenv("HF_USERNAME")
|
144 |
+
|
145 |
+
if not username:
|
146 |
+
raise ValueError(
|
147 |
+
"Username could not be determined. Provide a token or set HF_USERNAME, "
|
148 |
+
"or pass a fully-qualified repo id 'username/repo'."
|
149 |
+
)
|
150 |
+
|
151 |
+
return f"{username}/{repo_name}"
|
152 |
+
except Exception as resolve_error:
|
153 |
+
logger.error(f"Failed to resolve full repo id for '{repo_name}': {resolve_error}")
|
154 |
+
# Fall back to provided value (may fail later at create/upload)
|
155 |
+
return repo_name
|
156 |
|
157 |
def create_repository(self) -> bool:
|
158 |
"""Create the Hugging Face repository"""
|
159 |
try:
|
160 |
+
logger.info(f"Creating repository: {self.repo_id}")
|
161 |
|
162 |
# Create repository with timeout handling
|
163 |
try:
|
164 |
# Create repository
|
165 |
create_repo(
|
166 |
+
repo_id=self.repo_id,
|
167 |
token=self.token,
|
168 |
private=self.private,
|
169 |
exist_ok=True
|
170 |
)
|
171 |
|
172 |
+
logger.info(f"โ
Repository created: https://huggingface.co/{self.repo_id}")
|
173 |
return True
|
174 |
|
175 |
except Exception as e:
|
|
|
228 |
|
229 |
# Update with actual values
|
230 |
variables.update({
|
231 |
+
"repo_name": self.repo_id,
|
232 |
+
"model_name": self.repo_id.split('/')[-1],
|
233 |
"experiment_name": self.experiment_name or "model_push",
|
234 |
"dataset_repo": self.dataset_repo,
|
235 |
"author_name": self.author_name or "Model Author",
|
|
|
277 |
base_model: HuggingFaceTB/SmolLM3-3B
|
278 |
---
|
279 |
|
280 |
+
# {self.repo_id.split('/')[-1]}
|
281 |
|
282 |
This is a fine-tuned SmolLM3 model based on the HuggingFaceTB/SmolLM3-3B architecture.
|
283 |
|
|
|
308 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
309 |
|
310 |
# Load model and tokenizer
|
311 |
+
model = AutoModelForCausalLM.from_pretrained("{self.repo_id}")
|
312 |
+
tokenizer = AutoTokenizer.from_pretrained("{self.repo_id}")
|
313 |
|
314 |
# Generate text
|
315 |
inputs = tokenizer("Hello, how are you?", return_tensors="pt")
|
|
|
385 |
upload_file(
|
386 |
path_or_fileobj=str(file_path),
|
387 |
path_in_repo=remote_path,
|
388 |
+
repo_id=self.repo_id,
|
389 |
token=self.token
|
390 |
)
|
391 |
logger.info(f"โ
Uploaded {relative_path}")
|
|
|
420 |
upload_file(
|
421 |
path_or_fileobj=str(file_path),
|
422 |
path_in_repo=f"training_results/{file_name}",
|
423 |
+
repo_id=self.repo_id,
|
424 |
token=self.token
|
425 |
)
|
426 |
|
|
|
436 |
try:
|
437 |
logger.info("Creating README.md...")
|
438 |
|
439 |
+
readme_content = f"""# {self.repo_id.split('/')[-1]}
|
440 |
|
441 |
A fine-tuned SmolLM3 model for text generation tasks.
|
442 |
|
|
|
445 |
```python
|
446 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
447 |
|
448 |
+
model = AutoModelForCausalLM.from_pretrained("{self.repo_id}")
|
449 |
+
tokenizer = AutoTokenizer.from_pretrained("{self.repo_id}")
|
450 |
|
451 |
# Generate text
|
452 |
text = "Hello, how are you?"
|
|
|
502 |
path_or_fileobj=str(readme_path),
|
503 |
path_in_repo="README.md",
|
504 |
token=self.token,
|
505 |
+
repo_id=self.repo_id
|
506 |
)
|
507 |
|
508 |
# Clean up
|
|
|
522 |
# Log to Trackio
|
523 |
self.monitor.log_metrics({
|
524 |
"push_action": action,
|
525 |
+
"repo_name": self.repo_id,
|
526 |
"model_size_gb": self._get_model_size(),
|
527 |
"dataset_repo": self.dataset_repo,
|
528 |
**details
|
|
|
531 |
# Log training summary
|
532 |
self.monitor.log_training_summary({
|
533 |
"model_push": True,
|
534 |
+
"model_repo": self.repo_id,
|
535 |
"dataset_repo": self.dataset_repo,
|
536 |
"push_date": datetime.now().isoformat(),
|
537 |
**details
|
|
|
544 |
def push_model(self, training_config: Optional[Dict[str, Any]] = None,
|
545 |
results: Optional[Dict[str, Any]] = None) -> bool:
|
546 |
"""Complete model push process with HF Datasets integration"""
|
547 |
+
logger.info(f"๐ Starting model push to {self.repo_id}")
|
548 |
logger.info(f"๐ Dataset repository: {self.dataset_repo}")
|
549 |
|
550 |
# Validate model path
|
|
|
572 |
upload_file(
|
573 |
path_or_fileobj=str(model_card_path),
|
574 |
path_in_repo="README.md",
|
575 |
+
repo_id=self.repo_id,
|
576 |
token=self.token
|
577 |
)
|
578 |
finally:
|
|
|
595 |
"results": results
|
596 |
})
|
597 |
|
598 |
+
logger.info(f"๐ Model successfully pushed to: https://huggingface.co/{self.repo_id}")
|
599 |
logger.info(f"๐ Experiment data stored in: {self.dataset_repo}")
|
600 |
return True
|
601 |
|
|
|
621 |
|
622 |
# Required arguments
|
623 |
parser.add_argument('model_path', type=str, help='Path to trained model directory')
|
624 |
+
parser.add_argument('repo_name', type=str, help='Hugging Face repository name (repo-name). Username will be auto-detected from your token.')
|
625 |
|
626 |
# Optional arguments
|
627 |
parser.add_argument('--token', type=str, default=None, help='Hugging Face token')
|
scripts/training/train_gpt_oss.py
CHANGED
@@ -537,16 +537,38 @@ def train_gpt_oss(config_path, experiment_name, output_dir, trackio_url, trainer
|
|
537 |
# Create SFT configuration
|
538 |
sft_config = create_sft_config(config, output_dir)
|
539 |
|
540 |
-
# Create trainer
|
541 |
print("Creating SFT trainer...")
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
550 |
|
551 |
# Start training
|
552 |
print("Starting GPT-OSS training...")
|
|
|
537 |
# Create SFT configuration
|
538 |
sft_config = create_sft_config(config, output_dir)
|
539 |
|
540 |
+
# Create trainer with version-robust kwargs
|
541 |
print("Creating SFT trainer...")
|
542 |
+
try:
|
543 |
+
sft_sig = inspect.signature(SFTTrainer.__init__)
|
544 |
+
sft_params = set(sft_sig.parameters.keys())
|
545 |
+
except Exception:
|
546 |
+
sft_params = {"model", "args", "train_dataset", "tokenizer", "dataset_text_field", "max_seq_length"}
|
547 |
+
|
548 |
+
sft_kwargs = {
|
549 |
+
"model": peft_model,
|
550 |
+
"args": sft_config,
|
551 |
+
"train_dataset": dataset,
|
552 |
+
}
|
553 |
+
|
554 |
+
# Prefer passing tokenizer if supported; otherwise try processing_class
|
555 |
+
if "tokenizer" in sft_params:
|
556 |
+
sft_kwargs["tokenizer"] = tokenizer
|
557 |
+
elif "processing_class" in sft_params:
|
558 |
+
sft_kwargs["processing_class"] = tokenizer
|
559 |
+
|
560 |
+
# Pass dataset text field if supported (we produced a 'text' column)
|
561 |
+
if "dataset_text_field" in sft_params:
|
562 |
+
sft_kwargs["dataset_text_field"] = "text"
|
563 |
+
|
564 |
+
# Pass max sequence length if supported
|
565 |
+
if "max_seq_length" in sft_params:
|
566 |
+
sft_kwargs["max_seq_length"] = getattr(config, 'max_seq_length', 2048)
|
567 |
+
|
568 |
+
# Remove any None values
|
569 |
+
sft_kwargs = {k: v for k, v in sft_kwargs.items() if v is not None}
|
570 |
+
|
571 |
+
trainer = SFTTrainer(**sft_kwargs)
|
572 |
|
573 |
# Start training
|
574 |
print("Starting GPT-OSS training...")
|