Tonic commited on
Commit
eb9e91f
Β·
1 Parent(s): c23e2f5

hide all tokens in logs, never persist to disk, remove max_seq_length from config, add to trainer

Browse files
launch.sh CHANGED
@@ -996,7 +996,6 @@ echo "================================"
996
  print_info "Setting up Hugging Face token for Python API..."
997
  print_status "HF token configured for Python API usage"
998
  print_info "Username: $HF_USERNAME (auto-detected from token)"
999
- print_info "Token available in environment: ${HF_TOKEN:0:10}...${HF_TOKEN: -4}"
1000
 
1001
  # Verify token is available in the virtual environment
1002
  print_info "Verifying token availability in virtual environment..."
 
996
  print_info "Setting up Hugging Face token for Python API..."
997
  print_status "HF token configured for Python API usage"
998
  print_info "Username: $HF_USERNAME (auto-detected from token)"
 
999
 
1000
  # Verify token is available in the virtual environment
1001
  print_info "Verifying token availability in virtual environment..."
scripts/deploy_demo_space.py CHANGED
@@ -451,7 +451,7 @@ Simply start chatting with the model using the interface below!
451
  def _manual_secret_setup(self) -> bool:
452
  """Fallback method for manual secret setup"""
453
  logger.info("πŸ“ Manual Space Secrets Configuration:")
454
- logger.info(f" HF_TOKEN={self.hf_token}")
455
  logger.info(f" HF_MODEL_ID={self.model_id}")
456
  if self.subfolder and self.subfolder.strip():
457
  logger.info(f" MODEL_SUBFOLDER={self.subfolder}")
@@ -470,7 +470,7 @@ Simply start chatting with the model using the interface below!
470
  logger.info("2. Navigate to the 'Repository secrets' section")
471
  logger.info("3. Add the following secrets:")
472
  logger.info(f" Name: HF_TOKEN")
473
- logger.info(f" Value: {self.hf_token}")
474
  logger.info(f" Name: HF_MODEL_ID")
475
  logger.info(f" Value: {self.model_id}")
476
  if self.subfolder and self.subfolder.strip():
 
451
  def _manual_secret_setup(self) -> bool:
452
  """Fallback method for manual secret setup"""
453
  logger.info("πŸ“ Manual Space Secrets Configuration:")
454
+ logger.info(f" HF_TOKEN=<hidden>")
455
  logger.info(f" HF_MODEL_ID={self.model_id}")
456
  if self.subfolder and self.subfolder.strip():
457
  logger.info(f" MODEL_SUBFOLDER={self.subfolder}")
 
470
  logger.info("2. Navigate to the 'Repository secrets' section")
471
  logger.info("3. Add the following secrets:")
472
  logger.info(f" Name: HF_TOKEN")
473
+ logger.info(f" Value: <your token>")
474
  logger.info(f" Name: HF_MODEL_ID")
475
  logger.info(f" Value: {self.model_id}")
476
  if self.subfolder and self.subfolder.strip():
scripts/trackio_tonic/configure_trackio.py CHANGED
@@ -99,8 +99,9 @@ def configure_trackio():
99
  dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', f'{username}/trackio-experiments')
100
 
101
  # Current configuration
 
102
  current_config = {
103
- 'HF_TOKEN': hf_token or 'Not set',
104
  'TRACKIO_DATASET_REPO': dataset_repo,
105
  'SPACE_ID': os.environ.get('SPACE_ID', 'Not set'),
106
  'TRACKIO_URL': os.environ.get('TRACKIO_URL', 'Not set')
@@ -139,8 +140,8 @@ def configure_trackio():
139
  print(f"πŸ“Š Dataset Repository: {dataset_repo}")
140
 
141
  # Test dataset access if token is available
142
- test_token = current_config['HF_TOKEN']
143
- if test_token != 'Not set':
144
  print("\nπŸ§ͺ Testing Dataset Access...")
145
  try:
146
  from datasets import load_dataset
@@ -193,8 +194,9 @@ def configure_trackio():
193
 
194
  # Generate configuration file
195
  config_file = "trackio_config.json"
 
196
  config_data = {
197
- 'hf_token': current_config['HF_TOKEN'],
198
  'dataset_repo': current_config['TRACKIO_DATASET_REPO'],
199
  'space_id': current_config['SPACE_ID'],
200
  'trackio_url': current_config['TRACKIO_URL'],
@@ -211,7 +213,7 @@ def configure_trackio():
211
  # Show environment variable commands
212
  print("\nπŸ“ Environment Variables for HF Space:")
213
  print("=" * 50)
214
- print(f"HF_TOKEN={current_config['HF_TOKEN']}")
215
  print(f"TRACKIO_DATASET_REPO={current_config['TRACKIO_DATASET_REPO']}")
216
  if current_config['TRACKIO_URL'] != 'Not set':
217
  print(f"TRACKIO_URL={current_config['TRACKIO_URL']}")
 
99
  dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', f'{username}/trackio-experiments')
100
 
101
  # Current configuration
102
+ # Never expose raw tokens in logs; only track presence
103
  current_config = {
104
+ 'HF_TOKEN': 'Set' if hf_token else 'Not set',
105
  'TRACKIO_DATASET_REPO': dataset_repo,
106
  'SPACE_ID': os.environ.get('SPACE_ID', 'Not set'),
107
  'TRACKIO_URL': os.environ.get('TRACKIO_URL', 'Not set')
 
140
  print(f"πŸ“Š Dataset Repository: {dataset_repo}")
141
 
142
  # Test dataset access if token is available
143
+ test_token = hf_token
144
+ if test_token:
145
  print("\nπŸ§ͺ Testing Dataset Access...")
146
  try:
147
  from datasets import load_dataset
 
194
 
195
  # Generate configuration file
196
  config_file = "trackio_config.json"
197
+ # Do not persist raw tokens to disk; store only presence flag
198
  config_data = {
199
+ 'hf_token_set': bool(hf_token),
200
  'dataset_repo': current_config['TRACKIO_DATASET_REPO'],
201
  'space_id': current_config['SPACE_ID'],
202
  'trackio_url': current_config['TRACKIO_URL'],
 
213
  # Show environment variable commands
214
  print("\nπŸ“ Environment Variables for HF Space:")
215
  print("=" * 50)
216
+ print(f"HF_TOKEN={'Set' if hf_token else 'Not set'}")
217
  print(f"TRACKIO_DATASET_REPO={current_config['TRACKIO_DATASET_REPO']}")
218
  if current_config['TRACKIO_URL'] != 'Not set':
219
  print(f"TRACKIO_URL={current_config['TRACKIO_URL']}")
scripts/trackio_tonic/deploy_trackio_space.py CHANGED
@@ -381,10 +381,9 @@ class TrackioSpaceDeployer:
381
  """Fallback method for manual secret setup"""
382
  print("πŸ“ Manual Space Secrets Configuration:")
383
 
384
- # Use the provided token as HF_TOKEN
385
  hf_token = self.token
386
-
387
- print(f" HF_TOKEN={hf_token}")
388
 
389
  dataset_repo = self.dataset_repo or f"{self.username}/trackio-experiments"
390
  print(f" TRACKIO_DATASET_REPO={dataset_repo}")
@@ -395,7 +394,7 @@ class TrackioSpaceDeployer:
395
  print("2. Navigate to the 'Repository secrets' section")
396
  print("3. Add the following secrets:")
397
  print(f" Name: HF_TOKEN")
398
- print(f" Value: {hf_token}")
399
  print(f" Name: TRACKIO_DATASET_REPO")
400
  print(f" Value: {dataset_repo}")
401
  print(f" Name: TRACKIO_URL")
@@ -485,7 +484,7 @@ def main():
485
 
486
  print(f"Using provided arguments:")
487
  print(f" Space name: {space_name}")
488
- print(f" Token: {'*' * 10}...{token[-4:]}")
489
  print(f" Git email: {git_email or 'default'}")
490
  print(f" Git name: {git_name or 'default'}")
491
  print(f" Dataset repo: {dataset_repo or 'default'}")
 
381
  """Fallback method for manual secret setup"""
382
  print("πŸ“ Manual Space Secrets Configuration:")
383
 
384
+ # Use the provided token as HF_TOKEN, but never display it
385
  hf_token = self.token
386
+ print(f" HF_TOKEN={'*' * 10}...hidden")
 
387
 
388
  dataset_repo = self.dataset_repo or f"{self.username}/trackio-experiments"
389
  print(f" TRACKIO_DATASET_REPO={dataset_repo}")
 
394
  print("2. Navigate to the 'Repository secrets' section")
395
  print("3. Add the following secrets:")
396
  print(f" Name: HF_TOKEN")
397
+ print(f" Value: <your token>")
398
  print(f" Name: TRACKIO_DATASET_REPO")
399
  print(f" Value: {dataset_repo}")
400
  print(f" Name: TRACKIO_URL")
 
484
 
485
  print(f"Using provided arguments:")
486
  print(f" Space name: {space_name}")
487
+ print(f" Token: <hidden>")
488
  print(f" Git email: {git_email or 'default'}")
489
  print(f" Git name: {git_name or 'default'}")
490
  print(f" Dataset repo: {dataset_repo or 'default'}")
scripts/training/train_gpt_oss.py CHANGED
@@ -405,7 +405,6 @@ def create_sft_config(config, output_dir):
405
  gradient_accumulation_steps=gradient_accumulation_steps,
406
 
407
  # Model configuration
408
- max_seq_length=config.max_seq_length,
409
  gradient_checkpointing=getattr(config, 'use_gradient_checkpointing', True),
410
 
411
  # Mixed precision
@@ -506,6 +505,8 @@ def train_gpt_oss(config_path, experiment_name, output_dir, trackio_url, trainer
506
  args=sft_config,
507
  train_dataset=dataset,
508
  processing_class=tokenizer,
 
 
509
  )
510
 
511
  # Start training
 
405
  gradient_accumulation_steps=gradient_accumulation_steps,
406
 
407
  # Model configuration
 
408
  gradient_checkpointing=getattr(config, 'use_gradient_checkpointing', True),
409
 
410
  # Mixed precision
 
505
  args=sft_config,
506
  train_dataset=dataset,
507
  processing_class=tokenizer,
508
+ dataset_text_field="text",
509
+ max_seq_length=getattr(config, 'max_seq_length', 2048),
510
  )
511
 
512
  # Start training