Spaces:

Tonic
/

SmolFactory

Running

App Files Files Community

Tonic commited on 16 days ago

Commit

eb9e91f

1 Parent(s): c23e2f5

hide all tokens in logs, never persist to disk, remove max_seq_length from config, add to trainer

Browse files

Files changed (5) hide show

launch.sh +0 -1
scripts/deploy_demo_space.py +2 -2
scripts/trackio_tonic/configure_trackio.py +7 -5
scripts/trackio_tonic/deploy_trackio_space.py +4 -5
scripts/training/train_gpt_oss.py +2 -1

launch.sh CHANGED Viewed

@@ -996,7 +996,6 @@ echo "================================"
 print_info "Setting up Hugging Face token for Python API..."
 print_status "HF token configured for Python API usage"
 print_info "Username: $HF_USERNAME (auto-detected from token)"
-print_info "Token available in environment: ${HF_TOKEN:0:10}...${HF_TOKEN: -4}"
 # Verify token is available in the virtual environment
 print_info "Verifying token availability in virtual environment..."

 print_info "Setting up Hugging Face token for Python API..."
 print_status "HF token configured for Python API usage"
 print_info "Username: $HF_USERNAME (auto-detected from token)"
 # Verify token is available in the virtual environment
 print_info "Verifying token availability in virtual environment..."

scripts/deploy_demo_space.py CHANGED Viewed

@@ -451,7 +451,7 @@ Simply start chatting with the model using the interface below!
     def _manual_secret_setup(self) -> bool:
         """Fallback method for manual secret setup"""
         logger.info("📝 Manual Space Secrets Configuration:")
-        logger.info(f"   HF_TOKEN={self.hf_token}")
         logger.info(f"   HF_MODEL_ID={self.model_id}")
         if self.subfolder and self.subfolder.strip():
             logger.info(f"   MODEL_SUBFOLDER={self.subfolder}")
@@ -470,7 +470,7 @@ Simply start chatting with the model using the interface below!
         logger.info("2. Navigate to the 'Repository secrets' section")
         logger.info("3. Add the following secrets:")
         logger.info(f"   Name: HF_TOKEN")
-        logger.info(f"   Value: {self.hf_token}")
         logger.info(f"   Name: HF_MODEL_ID")
         logger.info(f"   Value: {self.model_id}")
         if self.subfolder and self.subfolder.strip():

     def _manual_secret_setup(self) -> bool:
         """Fallback method for manual secret setup"""
         logger.info("📝 Manual Space Secrets Configuration:")
+        logger.info(f"   HF_TOKEN=<hidden>")
         logger.info(f"   HF_MODEL_ID={self.model_id}")
         if self.subfolder and self.subfolder.strip():
             logger.info(f"   MODEL_SUBFOLDER={self.subfolder}")
         logger.info("2. Navigate to the 'Repository secrets' section")
         logger.info("3. Add the following secrets:")
         logger.info(f"   Name: HF_TOKEN")
+        logger.info(f"   Value: <your token>")
         logger.info(f"   Name: HF_MODEL_ID")
         logger.info(f"   Value: {self.model_id}")
         if self.subfolder and self.subfolder.strip():

scripts/trackio_tonic/configure_trackio.py CHANGED Viewed

@@ -99,8 +99,9 @@ def configure_trackio():
     dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', f'{username}/trackio-experiments')
     # Current configuration
     current_config = {
-        'HF_TOKEN': hf_token or 'Not set',
         'TRACKIO_DATASET_REPO': dataset_repo,
         'SPACE_ID': os.environ.get('SPACE_ID', 'Not set'),
         'TRACKIO_URL': os.environ.get('TRACKIO_URL', 'Not set')
@@ -139,8 +140,8 @@ def configure_trackio():
     print(f"📊 Dataset Repository: {dataset_repo}")
     # Test dataset access if token is available
-    test_token = current_config['HF_TOKEN']
-    if test_token != 'Not set':
         print("\n🧪 Testing Dataset Access...")
         try:
             from datasets import load_dataset
@@ -193,8 +194,9 @@ def configure_trackio():
     # Generate configuration file
     config_file = "trackio_config.json"
     config_data = {
-        'hf_token': current_config['HF_TOKEN'],
         'dataset_repo': current_config['TRACKIO_DATASET_REPO'],
         'space_id': current_config['SPACE_ID'],
         'trackio_url': current_config['TRACKIO_URL'],
@@ -211,7 +213,7 @@ def configure_trackio():
     # Show environment variable commands
     print("\n📝 Environment Variables for HF Space:")
     print("=" * 50)
-    print(f"HF_TOKEN={current_config['HF_TOKEN']}")
     print(f"TRACKIO_DATASET_REPO={current_config['TRACKIO_DATASET_REPO']}")
     if current_config['TRACKIO_URL'] != 'Not set':
         print(f"TRACKIO_URL={current_config['TRACKIO_URL']}")

     dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', f'{username}/trackio-experiments')
     # Current configuration
+    # Never expose raw tokens in logs; only track presence
     current_config = {
+        'HF_TOKEN': 'Set' if hf_token else 'Not set',
         'TRACKIO_DATASET_REPO': dataset_repo,
         'SPACE_ID': os.environ.get('SPACE_ID', 'Not set'),
         'TRACKIO_URL': os.environ.get('TRACKIO_URL', 'Not set')
     print(f"📊 Dataset Repository: {dataset_repo}")
     # Test dataset access if token is available
+    test_token = hf_token
+    if test_token:
         print("\n🧪 Testing Dataset Access...")
         try:
             from datasets import load_dataset
     # Generate configuration file
     config_file = "trackio_config.json"
+    # Do not persist raw tokens to disk; store only presence flag
     config_data = {
+        'hf_token_set': bool(hf_token),
         'dataset_repo': current_config['TRACKIO_DATASET_REPO'],
         'space_id': current_config['SPACE_ID'],
         'trackio_url': current_config['TRACKIO_URL'],
     # Show environment variable commands
     print("\n📝 Environment Variables for HF Space:")
     print("=" * 50)
+    print(f"HF_TOKEN={'Set' if hf_token else 'Not set'}")
     print(f"TRACKIO_DATASET_REPO={current_config['TRACKIO_DATASET_REPO']}")
     if current_config['TRACKIO_URL'] != 'Not set':
         print(f"TRACKIO_URL={current_config['TRACKIO_URL']}")

scripts/trackio_tonic/deploy_trackio_space.py CHANGED Viewed

@@ -381,10 +381,9 @@ class TrackioSpaceDeployer:
         """Fallback method for manual secret setup"""
         print("📝 Manual Space Secrets Configuration:")
-        # Use the provided token as HF_TOKEN
         hf_token = self.token
-        print(f"   HF_TOKEN={hf_token}")
         dataset_repo = self.dataset_repo or f"{self.username}/trackio-experiments"
         print(f"   TRACKIO_DATASET_REPO={dataset_repo}")
@@ -395,7 +394,7 @@ class TrackioSpaceDeployer:
         print("2. Navigate to the 'Repository secrets' section")
         print("3. Add the following secrets:")
         print(f"   Name: HF_TOKEN")
-        print(f"   Value: {hf_token}")
         print(f"   Name: TRACKIO_DATASET_REPO")
         print(f"   Value: {dataset_repo}")
         print(f"   Name: TRACKIO_URL")
@@ -485,7 +484,7 @@ def main():
         print(f"Using provided arguments:")
         print(f"  Space name: {space_name}")
-        print(f"  Token: {'*' * 10}...{token[-4:]}")
         print(f"  Git email: {git_email or 'default'}")
         print(f"  Git name: {git_name or 'default'}")
         print(f"  Dataset repo: {dataset_repo or 'default'}")

         """Fallback method for manual secret setup"""
         print("📝 Manual Space Secrets Configuration:")
+        # Use the provided token as HF_TOKEN, but never display it
         hf_token = self.token
+        print(f"   HF_TOKEN={'*' * 10}...hidden")
         dataset_repo = self.dataset_repo or f"{self.username}/trackio-experiments"
         print(f"   TRACKIO_DATASET_REPO={dataset_repo}")
         print("2. Navigate to the 'Repository secrets' section")
         print("3. Add the following secrets:")
         print(f"   Name: HF_TOKEN")
+        print(f"   Value: <your token>")
         print(f"   Name: TRACKIO_DATASET_REPO")
         print(f"   Value: {dataset_repo}")
         print(f"   Name: TRACKIO_URL")
         print(f"Using provided arguments:")
         print(f"  Space name: {space_name}")
+        print(f"  Token: <hidden>")
         print(f"  Git email: {git_email or 'default'}")
         print(f"  Git name: {git_name or 'default'}")
         print(f"  Dataset repo: {dataset_repo or 'default'}")

scripts/training/train_gpt_oss.py CHANGED Viewed

@@ -405,7 +405,6 @@ def create_sft_config(config, output_dir):
         gradient_accumulation_steps=gradient_accumulation_steps,
         # Model configuration
-        max_seq_length=config.max_seq_length,
         gradient_checkpointing=getattr(config, 'use_gradient_checkpointing', True),
         # Mixed precision
@@ -506,6 +505,8 @@ def train_gpt_oss(config_path, experiment_name, output_dir, trackio_url, trainer
         args=sft_config,
         train_dataset=dataset,
         processing_class=tokenizer,
     )
     # Start training

         gradient_accumulation_steps=gradient_accumulation_steps,
         # Model configuration
         gradient_checkpointing=getattr(config, 'use_gradient_checkpointing', True),
         # Mixed precision
         args=sft_config,
         train_dataset=dataset,
         processing_class=tokenizer,
+        dataset_text_field="text",
+        max_seq_length=getattr(config, 'max_seq_length', 2048),
     )
     # Start training