Tonic commited on
Commit
665844a
ยท
1 Parent(s): c346dad

adds repoid only based on repo name, adds version-robust sfttrainer

Browse files
launch.sh CHANGED
@@ -831,8 +831,11 @@ get_input "Experiment name" "smollm3_finetune_$(date +%Y%m%d_%H%M%S)" EXPERIMENT
831
 
832
  # Configure model repository name (customizable)
833
  print_info "Setting up model repository name..."
834
- DEFAULT_REPO_NAME="$HF_USERNAME/smolfactory-$(date +%Y%m%d)"
835
- get_input "Model repository name (Hugging Face format: username/repo)" "$DEFAULT_REPO_NAME" REPO_NAME
 
 
 
836
  print_status "Model repository: $REPO_NAME"
837
 
838
  # Automatically create dataset repository
@@ -1311,10 +1314,10 @@ export HF_USERNAME="$HF_USERNAME"
1311
  --hf-username "$HF_USERNAME" \
1312
  --model-id "$DEMO_MODEL_ID" \
1313
  --subfolder "$DEMO_SUBFOLDER" \
1314
- --space-name "${REPO_NAME}-demo"
1315
 
1316
  if [ $? -eq 0 ]; then
1317
- DEMO_SPACE_URL="https://huggingface.co/spaces/$HF_USERNAME/${REPO_NAME}-demo"
1318
  print_status "โœ… Demo space deployed successfully: $DEMO_SPACE_URL"
1319
  else
1320
  print_warning "โš ๏ธ Demo space deployment failed, but continuing with pipeline"
@@ -1385,7 +1388,7 @@ echo "๐Ÿ“ˆ Trackio: $TRACKIO_URL"
1385
  echo "๐Ÿ“‹ Experiment: $EXPERIMENT_NAME"
1386
  echo "๐Ÿ“Š Dataset: https://huggingface.co/datasets/$TRACKIO_DATASET_REPO"
1387
  $(if [ "$DEPLOY_DEMO" = "y" ] || [ "$DEPLOY_DEMO" = "Y" ]; then
1388
- echo "๐ŸŽฎ Demo: https://huggingface.co/spaces/$HF_USERNAME/${REPO_NAME}-demo"
1389
  fi)
1390
  echo ""
1391
  echo "๐Ÿ“‹ Summary report saved to: training_summary.md"
 
831
 
832
  # Configure model repository name (customizable)
833
  print_info "Setting up model repository name..."
834
+ # Ask only for short repo name; we'll prefix with username automatically
835
+ DEFAULT_SHORT_REPO="smolfactory-$(date +%Y%m%d)"
836
+ get_input "Model repository name (repo only, no username/)" "$DEFAULT_SHORT_REPO" REPO_SHORT
837
+ # Build full repo id using detected username
838
+ REPO_NAME="$HF_USERNAME/$REPO_SHORT"
839
  print_status "Model repository: $REPO_NAME"
840
 
841
  # Automatically create dataset repository
 
1314
  --hf-username "$HF_USERNAME" \
1315
  --model-id "$DEMO_MODEL_ID" \
1316
  --subfolder "$DEMO_SUBFOLDER" \
1317
+ --space-name "${REPO_SHORT}-demo"
1318
 
1319
  if [ $? -eq 0 ]; then
1320
+ DEMO_SPACE_URL="https://huggingface.co/spaces/$HF_USERNAME/${REPO_SHORT}-demo"
1321
  print_status "โœ… Demo space deployed successfully: $DEMO_SPACE_URL"
1322
  else
1323
  print_warning "โš ๏ธ Demo space deployment failed, but continuing with pipeline"
 
1388
  echo "๐Ÿ“‹ Experiment: $EXPERIMENT_NAME"
1389
  echo "๐Ÿ“Š Dataset: https://huggingface.co/datasets/$TRACKIO_DATASET_REPO"
1390
  $(if [ "$DEPLOY_DEMO" = "y" ] || [ "$DEPLOY_DEMO" = "Y" ]; then
1391
+ echo "๐ŸŽฎ Demo: https://huggingface.co/spaces/$HF_USERNAME/${REPO_SHORT}-demo"
1392
  fi)
1393
  echo ""
1394
  echo "๐Ÿ“‹ Summary report saved to: training_summary.md"
scripts/deploy_demo_space.py CHANGED
@@ -42,9 +42,10 @@ class DemoSpaceDeployer:
42
  demo_type: Optional[str] = None):
43
  self.hf_token = hf_token
44
  self.hf_username = hf_username
45
- self.model_id = model_id
 
46
  self.subfolder = subfolder
47
- self.space_name = space_name or f"{model_id.split('/')[-1]}-demo"
48
  self.space_id = f"{hf_username}/{self.space_name}"
49
  self.space_url = f"https://huggingface.co/spaces/{self.space_id}"
50
 
 
42
  demo_type: Optional[str] = None):
43
  self.hf_token = hf_token
44
  self.hf_username = hf_username
45
+ # Allow passing just a repo name without username and auto-prefix
46
+ self.model_id = model_id if "/" in model_id else f"{hf_username}/{model_id}"
47
  self.subfolder = subfolder
48
+ self.space_name = space_name or f"{self.model_id.split('/')[-1]}-demo"
49
  self.space_id = f"{hf_username}/{self.space_name}"
50
  self.space_url = f"https://huggingface.co/spaces/{self.space_id}"
51
 
scripts/model_tonic/push_gpt_oss_to_huggingface.py CHANGED
@@ -247,12 +247,35 @@ This model is licensed under the MIT License.
247
 
248
  return card_content
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experiment_name, dataset_repo, author_name, model_description, training_config_type=None, model_name=None, dataset_name=None, batch_size=None, learning_rate=None, max_epochs=None, max_seq_length=None, trainer_type=None):
251
  """Push GPT-OSS model to Hugging Face Hub"""
252
 
253
  print("=== GPT-OSS Model Push Pipeline ===")
254
  print(f"Checkpoint: {checkpoint_path}")
255
- print(f"Repository: {repo_name}")
 
256
  print(f"Experiment: {experiment_name}")
257
  print(f"Author: {author_name}")
258
 
@@ -276,7 +299,7 @@ def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experi
276
  # Create model card
277
  print("Creating model card...")
278
  model_card_content = create_gpt_oss_model_card(
279
- model_name=repo_name,
280
  experiment_name=experiment_name,
281
  trackio_url=trackio_url,
282
  dataset_repo=dataset_repo,
@@ -297,18 +320,18 @@ def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experi
297
  f.write(model_card_content)
298
 
299
  # Push to Hugging Face Hub
300
- print(f"Pushing model to: {repo_name}")
301
 
302
  # Set HF token
303
  os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
304
 
305
  # Push using transformers
306
  from huggingface_hub import HfApi
307
- api = HfApi()
308
 
309
  # Create repository if it doesn't exist
310
  try:
311
- api.create_repo(repo_name, private=False, exist_ok=True)
312
  except Exception as e:
313
  print(f"Warning: Could not create repository: {e}")
314
 
@@ -316,12 +339,12 @@ def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experi
316
  print("Uploading model files...")
317
  api.upload_folder(
318
  folder_path=temp_output,
319
- repo_id=repo_name,
320
  repo_type="model"
321
  )
322
 
323
  print("โœ… GPT-OSS model pushed successfully!")
324
- print(f"Model URL: https://huggingface.co/{repo_name}")
325
 
326
  # Clean up
327
  import shutil
 
247
 
248
  return card_content
249
 
250
+ def _resolve_repo_id(repo_name: str, hf_token: str) -> str:
251
+ """Resolve to username/repo if only repo name was provided."""
252
+ try:
253
+ if "/" in repo_name:
254
+ return repo_name
255
+ from huggingface_hub import HfApi
256
+ username = None
257
+ if hf_token:
258
+ try:
259
+ api = HfApi(token=hf_token)
260
+ info = api.whoami()
261
+ username = info.get("name") or info.get("username")
262
+ except Exception:
263
+ username = None
264
+ if not username:
265
+ username = os.getenv("HF_USERNAME")
266
+ if not username:
267
+ raise ValueError("Could not determine HF username. Set HF_USERNAME or pass username/repo.")
268
+ return f"{username}/{repo_name}"
269
+ except Exception:
270
+ return repo_name
271
+
272
  def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experiment_name, dataset_repo, author_name, model_description, training_config_type=None, model_name=None, dataset_name=None, batch_size=None, learning_rate=None, max_epochs=None, max_seq_length=None, trainer_type=None):
273
  """Push GPT-OSS model to Hugging Face Hub"""
274
 
275
  print("=== GPT-OSS Model Push Pipeline ===")
276
  print(f"Checkpoint: {checkpoint_path}")
277
+ full_repo_id = _resolve_repo_id(repo_name, hf_token)
278
+ print(f"Repository: {full_repo_id}")
279
  print(f"Experiment: {experiment_name}")
280
  print(f"Author: {author_name}")
281
 
 
299
  # Create model card
300
  print("Creating model card...")
301
  model_card_content = create_gpt_oss_model_card(
302
+ model_name=full_repo_id,
303
  experiment_name=experiment_name,
304
  trackio_url=trackio_url,
305
  dataset_repo=dataset_repo,
 
320
  f.write(model_card_content)
321
 
322
  # Push to Hugging Face Hub
323
+ print(f"Pushing model to: {full_repo_id}")
324
 
325
  # Set HF token
326
  os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
327
 
328
  # Push using transformers
329
  from huggingface_hub import HfApi
330
+ api = HfApi(token=hf_token)
331
 
332
  # Create repository if it doesn't exist
333
  try:
334
+ api.create_repo(full_repo_id, private=False, exist_ok=True)
335
  except Exception as e:
336
  print(f"Warning: Could not create repository: {e}")
337
 
 
339
  print("Uploading model files...")
340
  api.upload_folder(
341
  folder_path=temp_output,
342
+ repo_id=full_repo_id,
343
  repo_type="model"
344
  )
345
 
346
  print("โœ… GPT-OSS model pushed successfully!")
347
+ print(f"Model URL: https://huggingface.co/{full_repo_id}")
348
 
349
  # Clean up
350
  import shutil
scripts/model_tonic/push_to_huggingface.py CHANGED
@@ -73,6 +73,7 @@ class HuggingFacePusher:
73
  trainer_type: Optional[str] = None
74
  ):
75
  self.model_path = Path(model_path)
 
76
  self.repo_name = repo_name
77
  self.token = token or hf_token or os.getenv('HF_TOKEN')
78
  self.private = private
@@ -101,6 +102,9 @@ class HuggingFacePusher:
101
  else:
102
  raise ImportError("huggingface_hub is required. Install with: pip install huggingface_hub")
103
 
 
 
 
104
  # Initialize monitoring if available
105
  self.monitor = None
106
  if MONITORING_AVAILABLE:
@@ -112,25 +116,60 @@ class HuggingFacePusher:
112
  dataset_repo=self.dataset_repo
113
  )
114
 
115
- logger.info(f"Initialized HuggingFacePusher for {repo_name}")
116
  logger.info(f"Dataset repository: {self.dataset_repo}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  def create_repository(self) -> bool:
119
  """Create the Hugging Face repository"""
120
  try:
121
- logger.info(f"Creating repository: {self.repo_name}")
122
 
123
  # Create repository with timeout handling
124
  try:
125
  # Create repository
126
  create_repo(
127
- repo_id=self.repo_name,
128
  token=self.token,
129
  private=self.private,
130
  exist_ok=True
131
  )
132
 
133
- logger.info(f"โœ… Repository created: https://huggingface.co/{self.repo_name}")
134
  return True
135
 
136
  except Exception as e:
@@ -189,8 +228,8 @@ class HuggingFacePusher:
189
 
190
  # Update with actual values
191
  variables.update({
192
- "repo_name": self.repo_name,
193
- "model_name": self.repo_name.split('/')[-1],
194
  "experiment_name": self.experiment_name or "model_push",
195
  "dataset_repo": self.dataset_repo,
196
  "author_name": self.author_name or "Model Author",
@@ -238,7 +277,7 @@ pipeline_tag: text-generation
238
  base_model: HuggingFaceTB/SmolLM3-3B
239
  ---
240
 
241
- # {self.repo_name.split('/')[-1]}
242
 
243
  This is a fine-tuned SmolLM3 model based on the HuggingFaceTB/SmolLM3-3B architecture.
244
 
@@ -269,8 +308,8 @@ This is a fine-tuned SmolLM3 model based on the HuggingFaceTB/SmolLM3-3B archite
269
  from transformers import AutoModelForCausalLM, AutoTokenizer
270
 
271
  # Load model and tokenizer
272
- model = AutoModelForCausalLM.from_pretrained("{self.repo_name}")
273
- tokenizer = AutoTokenizer.from_pretrained("{self.repo_name}")
274
 
275
  # Generate text
276
  inputs = tokenizer("Hello, how are you?", return_tensors="pt")
@@ -346,7 +385,7 @@ This model is licensed under the Apache 2.0 License.
346
  upload_file(
347
  path_or_fileobj=str(file_path),
348
  path_in_repo=remote_path,
349
- repo_id=self.repo_name,
350
  token=self.token
351
  )
352
  logger.info(f"โœ… Uploaded {relative_path}")
@@ -381,7 +420,7 @@ This model is licensed under the Apache 2.0 License.
381
  upload_file(
382
  path_or_fileobj=str(file_path),
383
  path_in_repo=f"training_results/{file_name}",
384
- repo_id=self.repo_name,
385
  token=self.token
386
  )
387
 
@@ -397,7 +436,7 @@ This model is licensed under the Apache 2.0 License.
397
  try:
398
  logger.info("Creating README.md...")
399
 
400
- readme_content = f"""# {self.repo_name.split('/')[-1]}
401
 
402
  A fine-tuned SmolLM3 model for text generation tasks.
403
 
@@ -406,8 +445,8 @@ A fine-tuned SmolLM3 model for text generation tasks.
406
  ```python
407
  from transformers import AutoModelForCausalLM, AutoTokenizer
408
 
409
- model = AutoModelForCausalLM.from_pretrained("{self.repo_name}")
410
- tokenizer = AutoTokenizer.from_pretrained("{self.repo_name}")
411
 
412
  # Generate text
413
  text = "Hello, how are you?"
@@ -463,7 +502,7 @@ MIT License
463
  path_or_fileobj=str(readme_path),
464
  path_in_repo="README.md",
465
  token=self.token,
466
- repo_id=self.repo_name
467
  )
468
 
469
  # Clean up
@@ -483,7 +522,7 @@ MIT License
483
  # Log to Trackio
484
  self.monitor.log_metrics({
485
  "push_action": action,
486
- "repo_name": self.repo_name,
487
  "model_size_gb": self._get_model_size(),
488
  "dataset_repo": self.dataset_repo,
489
  **details
@@ -492,7 +531,7 @@ MIT License
492
  # Log training summary
493
  self.monitor.log_training_summary({
494
  "model_push": True,
495
- "model_repo": self.repo_name,
496
  "dataset_repo": self.dataset_repo,
497
  "push_date": datetime.now().isoformat(),
498
  **details
@@ -505,7 +544,7 @@ MIT License
505
  def push_model(self, training_config: Optional[Dict[str, Any]] = None,
506
  results: Optional[Dict[str, Any]] = None) -> bool:
507
  """Complete model push process with HF Datasets integration"""
508
- logger.info(f"๐Ÿš€ Starting model push to {self.repo_name}")
509
  logger.info(f"๐Ÿ“Š Dataset repository: {self.dataset_repo}")
510
 
511
  # Validate model path
@@ -533,7 +572,7 @@ MIT License
533
  upload_file(
534
  path_or_fileobj=str(model_card_path),
535
  path_in_repo="README.md",
536
- repo_id=self.repo_name,
537
  token=self.token
538
  )
539
  finally:
@@ -556,7 +595,7 @@ MIT License
556
  "results": results
557
  })
558
 
559
- logger.info(f"๐ŸŽ‰ Model successfully pushed to: https://huggingface.co/{self.repo_name}")
560
  logger.info(f"๐Ÿ“Š Experiment data stored in: {self.dataset_repo}")
561
  return True
562
 
@@ -582,7 +621,7 @@ def parse_args():
582
 
583
  # Required arguments
584
  parser.add_argument('model_path', type=str, help='Path to trained model directory')
585
- parser.add_argument('repo_name', type=str, help='Hugging Face repository name (username/repo-name)')
586
 
587
  # Optional arguments
588
  parser.add_argument('--token', type=str, default=None, help='Hugging Face token')
 
73
  trainer_type: Optional[str] = None
74
  ):
75
  self.model_path = Path(model_path)
76
+ # Original user input (may be just the repo name without username)
77
  self.repo_name = repo_name
78
  self.token = token or hf_token or os.getenv('HF_TOKEN')
79
  self.private = private
 
102
  else:
103
  raise ImportError("huggingface_hub is required. Install with: pip install huggingface_hub")
104
 
105
+ # Resolve the full repo id (username/repo) if user only provided repo name
106
+ self.repo_id = self._resolve_repo_id(self.repo_name)
107
+
108
  # Initialize monitoring if available
109
  self.monitor = None
110
  if MONITORING_AVAILABLE:
 
116
  dataset_repo=self.dataset_repo
117
  )
118
 
119
+ logger.info(f"Initialized HuggingFacePusher for {self.repo_id}")
120
  logger.info(f"Dataset repository: {self.dataset_repo}")
121
+
122
+ def _resolve_repo_id(self, repo_name: str) -> str:
123
+ """Return a fully-qualified repo id in the form username/repo.
124
+
125
+ If the provided name already contains a '/', it is returned unchanged.
126
+ Otherwise, we attempt to derive the username from the authenticated token
127
+ or from the HF_USERNAME environment variable.
128
+ """
129
+ try:
130
+ if "/" in repo_name:
131
+ return repo_name
132
+
133
+ # Need a username. Prefer API whoami(), fallback to env HF_USERNAME
134
+ username: Optional[str] = None
135
+ if self.token:
136
+ try:
137
+ user_info = self.api.whoami()
138
+ username = user_info.get("name") or user_info.get("username")
139
+ except Exception:
140
+ username = None
141
+
142
+ if not username:
143
+ username = os.getenv("HF_USERNAME")
144
+
145
+ if not username:
146
+ raise ValueError(
147
+ "Username could not be determined. Provide a token or set HF_USERNAME, "
148
+ "or pass a fully-qualified repo id 'username/repo'."
149
+ )
150
+
151
+ return f"{username}/{repo_name}"
152
+ except Exception as resolve_error:
153
+ logger.error(f"Failed to resolve full repo id for '{repo_name}': {resolve_error}")
154
+ # Fall back to provided value (may fail later at create/upload)
155
+ return repo_name
156
 
157
  def create_repository(self) -> bool:
158
  """Create the Hugging Face repository"""
159
  try:
160
+ logger.info(f"Creating repository: {self.repo_id}")
161
 
162
  # Create repository with timeout handling
163
  try:
164
  # Create repository
165
  create_repo(
166
+ repo_id=self.repo_id,
167
  token=self.token,
168
  private=self.private,
169
  exist_ok=True
170
  )
171
 
172
+ logger.info(f"โœ… Repository created: https://huggingface.co/{self.repo_id}")
173
  return True
174
 
175
  except Exception as e:
 
228
 
229
  # Update with actual values
230
  variables.update({
231
+ "repo_name": self.repo_id,
232
+ "model_name": self.repo_id.split('/')[-1],
233
  "experiment_name": self.experiment_name or "model_push",
234
  "dataset_repo": self.dataset_repo,
235
  "author_name": self.author_name or "Model Author",
 
277
  base_model: HuggingFaceTB/SmolLM3-3B
278
  ---
279
 
280
+ # {self.repo_id.split('/')[-1]}
281
 
282
  This is a fine-tuned SmolLM3 model based on the HuggingFaceTB/SmolLM3-3B architecture.
283
 
 
308
  from transformers import AutoModelForCausalLM, AutoTokenizer
309
 
310
  # Load model and tokenizer
311
+ model = AutoModelForCausalLM.from_pretrained("{self.repo_id}")
312
+ tokenizer = AutoTokenizer.from_pretrained("{self.repo_id}")
313
 
314
  # Generate text
315
  inputs = tokenizer("Hello, how are you?", return_tensors="pt")
 
385
  upload_file(
386
  path_or_fileobj=str(file_path),
387
  path_in_repo=remote_path,
388
+ repo_id=self.repo_id,
389
  token=self.token
390
  )
391
  logger.info(f"โœ… Uploaded {relative_path}")
 
420
  upload_file(
421
  path_or_fileobj=str(file_path),
422
  path_in_repo=f"training_results/{file_name}",
423
+ repo_id=self.repo_id,
424
  token=self.token
425
  )
426
 
 
436
  try:
437
  logger.info("Creating README.md...")
438
 
439
+ readme_content = f"""# {self.repo_id.split('/')[-1]}
440
 
441
  A fine-tuned SmolLM3 model for text generation tasks.
442
 
 
445
  ```python
446
  from transformers import AutoModelForCausalLM, AutoTokenizer
447
 
448
+ model = AutoModelForCausalLM.from_pretrained("{self.repo_id}")
449
+ tokenizer = AutoTokenizer.from_pretrained("{self.repo_id}")
450
 
451
  # Generate text
452
  text = "Hello, how are you?"
 
502
  path_or_fileobj=str(readme_path),
503
  path_in_repo="README.md",
504
  token=self.token,
505
+ repo_id=self.repo_id
506
  )
507
 
508
  # Clean up
 
522
  # Log to Trackio
523
  self.monitor.log_metrics({
524
  "push_action": action,
525
+ "repo_name": self.repo_id,
526
  "model_size_gb": self._get_model_size(),
527
  "dataset_repo": self.dataset_repo,
528
  **details
 
531
  # Log training summary
532
  self.monitor.log_training_summary({
533
  "model_push": True,
534
+ "model_repo": self.repo_id,
535
  "dataset_repo": self.dataset_repo,
536
  "push_date": datetime.now().isoformat(),
537
  **details
 
544
  def push_model(self, training_config: Optional[Dict[str, Any]] = None,
545
  results: Optional[Dict[str, Any]] = None) -> bool:
546
  """Complete model push process with HF Datasets integration"""
547
+ logger.info(f"๐Ÿš€ Starting model push to {self.repo_id}")
548
  logger.info(f"๐Ÿ“Š Dataset repository: {self.dataset_repo}")
549
 
550
  # Validate model path
 
572
  upload_file(
573
  path_or_fileobj=str(model_card_path),
574
  path_in_repo="README.md",
575
+ repo_id=self.repo_id,
576
  token=self.token
577
  )
578
  finally:
 
595
  "results": results
596
  })
597
 
598
+ logger.info(f"๐ŸŽ‰ Model successfully pushed to: https://huggingface.co/{self.repo_id}")
599
  logger.info(f"๐Ÿ“Š Experiment data stored in: {self.dataset_repo}")
600
  return True
601
 
 
621
 
622
  # Required arguments
623
  parser.add_argument('model_path', type=str, help='Path to trained model directory')
624
+ parser.add_argument('repo_name', type=str, help='Hugging Face repository name (repo-name). Username will be auto-detected from your token.')
625
 
626
  # Optional arguments
627
  parser.add_argument('--token', type=str, default=None, help='Hugging Face token')
scripts/training/train_gpt_oss.py CHANGED
@@ -537,16 +537,38 @@ def train_gpt_oss(config_path, experiment_name, output_dir, trackio_url, trainer
537
  # Create SFT configuration
538
  sft_config = create_sft_config(config, output_dir)
539
 
540
- # Create trainer
541
  print("Creating SFT trainer...")
542
- trainer = SFTTrainer(
543
- model=peft_model,
544
- args=sft_config,
545
- train_dataset=dataset,
546
- tokenizer=tokenizer,
547
- dataset_text_field="text",
548
- max_seq_length=getattr(config, 'max_seq_length', 2048),
549
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
 
551
  # Start training
552
  print("Starting GPT-OSS training...")
 
537
  # Create SFT configuration
538
  sft_config = create_sft_config(config, output_dir)
539
 
540
+ # Create trainer with version-robust kwargs
541
  print("Creating SFT trainer...")
542
+ try:
543
+ sft_sig = inspect.signature(SFTTrainer.__init__)
544
+ sft_params = set(sft_sig.parameters.keys())
545
+ except Exception:
546
+ sft_params = {"model", "args", "train_dataset", "tokenizer", "dataset_text_field", "max_seq_length"}
547
+
548
+ sft_kwargs = {
549
+ "model": peft_model,
550
+ "args": sft_config,
551
+ "train_dataset": dataset,
552
+ }
553
+
554
+ # Prefer passing tokenizer if supported; otherwise try processing_class
555
+ if "tokenizer" in sft_params:
556
+ sft_kwargs["tokenizer"] = tokenizer
557
+ elif "processing_class" in sft_params:
558
+ sft_kwargs["processing_class"] = tokenizer
559
+
560
+ # Pass dataset text field if supported (we produced a 'text' column)
561
+ if "dataset_text_field" in sft_params:
562
+ sft_kwargs["dataset_text_field"] = "text"
563
+
564
+ # Pass max sequence length if supported
565
+ if "max_seq_length" in sft_params:
566
+ sft_kwargs["max_seq_length"] = getattr(config, 'max_seq_length', 2048)
567
+
568
+ # Remove any None values
569
+ sft_kwargs = {k: v for k, v in sft_kwargs.items() if v is not None}
570
+
571
+ trainer = SFTTrainer(**sft_kwargs)
572
 
573
  # Start training
574
  print("Starting GPT-OSS training...")