Spaces:
Running
Running
hide all tokens in logs, never persist to disk, remove max_seq_length from config, add to trainer
Browse files
launch.sh
CHANGED
@@ -996,7 +996,6 @@ echo "================================"
|
|
996 |
print_info "Setting up Hugging Face token for Python API..."
|
997 |
print_status "HF token configured for Python API usage"
|
998 |
print_info "Username: $HF_USERNAME (auto-detected from token)"
|
999 |
-
print_info "Token available in environment: ${HF_TOKEN:0:10}...${HF_TOKEN: -4}"
|
1000 |
|
1001 |
# Verify token is available in the virtual environment
|
1002 |
print_info "Verifying token availability in virtual environment..."
|
|
|
996 |
print_info "Setting up Hugging Face token for Python API..."
|
997 |
print_status "HF token configured for Python API usage"
|
998 |
print_info "Username: $HF_USERNAME (auto-detected from token)"
|
|
|
999 |
|
1000 |
# Verify token is available in the virtual environment
|
1001 |
print_info "Verifying token availability in virtual environment..."
|
scripts/deploy_demo_space.py
CHANGED
@@ -451,7 +451,7 @@ Simply start chatting with the model using the interface below!
|
|
451 |
def _manual_secret_setup(self) -> bool:
|
452 |
"""Fallback method for manual secret setup"""
|
453 |
logger.info("π Manual Space Secrets Configuration:")
|
454 |
-
logger.info(f" HF_TOKEN
|
455 |
logger.info(f" HF_MODEL_ID={self.model_id}")
|
456 |
if self.subfolder and self.subfolder.strip():
|
457 |
logger.info(f" MODEL_SUBFOLDER={self.subfolder}")
|
@@ -470,7 +470,7 @@ Simply start chatting with the model using the interface below!
|
|
470 |
logger.info("2. Navigate to the 'Repository secrets' section")
|
471 |
logger.info("3. Add the following secrets:")
|
472 |
logger.info(f" Name: HF_TOKEN")
|
473 |
-
logger.info(f" Value:
|
474 |
logger.info(f" Name: HF_MODEL_ID")
|
475 |
logger.info(f" Value: {self.model_id}")
|
476 |
if self.subfolder and self.subfolder.strip():
|
|
|
451 |
def _manual_secret_setup(self) -> bool:
|
452 |
"""Fallback method for manual secret setup"""
|
453 |
logger.info("π Manual Space Secrets Configuration:")
|
454 |
+
logger.info(f" HF_TOKEN=<hidden>")
|
455 |
logger.info(f" HF_MODEL_ID={self.model_id}")
|
456 |
if self.subfolder and self.subfolder.strip():
|
457 |
logger.info(f" MODEL_SUBFOLDER={self.subfolder}")
|
|
|
470 |
logger.info("2. Navigate to the 'Repository secrets' section")
|
471 |
logger.info("3. Add the following secrets:")
|
472 |
logger.info(f" Name: HF_TOKEN")
|
473 |
+
logger.info(f" Value: <your token>")
|
474 |
logger.info(f" Name: HF_MODEL_ID")
|
475 |
logger.info(f" Value: {self.model_id}")
|
476 |
if self.subfolder and self.subfolder.strip():
|
scripts/trackio_tonic/configure_trackio.py
CHANGED
@@ -99,8 +99,9 @@ def configure_trackio():
|
|
99 |
dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', f'{username}/trackio-experiments')
|
100 |
|
101 |
# Current configuration
|
|
|
102 |
current_config = {
|
103 |
-
'HF_TOKEN': hf_token
|
104 |
'TRACKIO_DATASET_REPO': dataset_repo,
|
105 |
'SPACE_ID': os.environ.get('SPACE_ID', 'Not set'),
|
106 |
'TRACKIO_URL': os.environ.get('TRACKIO_URL', 'Not set')
|
@@ -139,8 +140,8 @@ def configure_trackio():
|
|
139 |
print(f"π Dataset Repository: {dataset_repo}")
|
140 |
|
141 |
# Test dataset access if token is available
|
142 |
-
test_token =
|
143 |
-
if test_token
|
144 |
print("\nπ§ͺ Testing Dataset Access...")
|
145 |
try:
|
146 |
from datasets import load_dataset
|
@@ -193,8 +194,9 @@ def configure_trackio():
|
|
193 |
|
194 |
# Generate configuration file
|
195 |
config_file = "trackio_config.json"
|
|
|
196 |
config_data = {
|
197 |
-
'
|
198 |
'dataset_repo': current_config['TRACKIO_DATASET_REPO'],
|
199 |
'space_id': current_config['SPACE_ID'],
|
200 |
'trackio_url': current_config['TRACKIO_URL'],
|
@@ -211,7 +213,7 @@ def configure_trackio():
|
|
211 |
# Show environment variable commands
|
212 |
print("\nπ Environment Variables for HF Space:")
|
213 |
print("=" * 50)
|
214 |
-
print(f"HF_TOKEN={
|
215 |
print(f"TRACKIO_DATASET_REPO={current_config['TRACKIO_DATASET_REPO']}")
|
216 |
if current_config['TRACKIO_URL'] != 'Not set':
|
217 |
print(f"TRACKIO_URL={current_config['TRACKIO_URL']}")
|
|
|
99 |
dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', f'{username}/trackio-experiments')
|
100 |
|
101 |
# Current configuration
|
102 |
+
# Never expose raw tokens in logs; only track presence
|
103 |
current_config = {
|
104 |
+
'HF_TOKEN': 'Set' if hf_token else 'Not set',
|
105 |
'TRACKIO_DATASET_REPO': dataset_repo,
|
106 |
'SPACE_ID': os.environ.get('SPACE_ID', 'Not set'),
|
107 |
'TRACKIO_URL': os.environ.get('TRACKIO_URL', 'Not set')
|
|
|
140 |
print(f"π Dataset Repository: {dataset_repo}")
|
141 |
|
142 |
# Test dataset access if token is available
|
143 |
+
test_token = hf_token
|
144 |
+
if test_token:
|
145 |
print("\nπ§ͺ Testing Dataset Access...")
|
146 |
try:
|
147 |
from datasets import load_dataset
|
|
|
194 |
|
195 |
# Generate configuration file
|
196 |
config_file = "trackio_config.json"
|
197 |
+
# Do not persist raw tokens to disk; store only presence flag
|
198 |
config_data = {
|
199 |
+
'hf_token_set': bool(hf_token),
|
200 |
'dataset_repo': current_config['TRACKIO_DATASET_REPO'],
|
201 |
'space_id': current_config['SPACE_ID'],
|
202 |
'trackio_url': current_config['TRACKIO_URL'],
|
|
|
213 |
# Show environment variable commands
|
214 |
print("\nπ Environment Variables for HF Space:")
|
215 |
print("=" * 50)
|
216 |
+
print(f"HF_TOKEN={'Set' if hf_token else 'Not set'}")
|
217 |
print(f"TRACKIO_DATASET_REPO={current_config['TRACKIO_DATASET_REPO']}")
|
218 |
if current_config['TRACKIO_URL'] != 'Not set':
|
219 |
print(f"TRACKIO_URL={current_config['TRACKIO_URL']}")
|
scripts/trackio_tonic/deploy_trackio_space.py
CHANGED
@@ -381,10 +381,9 @@ class TrackioSpaceDeployer:
|
|
381 |
"""Fallback method for manual secret setup"""
|
382 |
print("π Manual Space Secrets Configuration:")
|
383 |
|
384 |
-
# Use the provided token as HF_TOKEN
|
385 |
hf_token = self.token
|
386 |
-
|
387 |
-
print(f" HF_TOKEN={hf_token}")
|
388 |
|
389 |
dataset_repo = self.dataset_repo or f"{self.username}/trackio-experiments"
|
390 |
print(f" TRACKIO_DATASET_REPO={dataset_repo}")
|
@@ -395,7 +394,7 @@ class TrackioSpaceDeployer:
|
|
395 |
print("2. Navigate to the 'Repository secrets' section")
|
396 |
print("3. Add the following secrets:")
|
397 |
print(f" Name: HF_TOKEN")
|
398 |
-
print(f" Value:
|
399 |
print(f" Name: TRACKIO_DATASET_REPO")
|
400 |
print(f" Value: {dataset_repo}")
|
401 |
print(f" Name: TRACKIO_URL")
|
@@ -485,7 +484,7 @@ def main():
|
|
485 |
|
486 |
print(f"Using provided arguments:")
|
487 |
print(f" Space name: {space_name}")
|
488 |
-
print(f" Token:
|
489 |
print(f" Git email: {git_email or 'default'}")
|
490 |
print(f" Git name: {git_name or 'default'}")
|
491 |
print(f" Dataset repo: {dataset_repo or 'default'}")
|
|
|
381 |
"""Fallback method for manual secret setup"""
|
382 |
print("π Manual Space Secrets Configuration:")
|
383 |
|
384 |
+
# Use the provided token as HF_TOKEN, but never display it
|
385 |
hf_token = self.token
|
386 |
+
print(f" HF_TOKEN={'*' * 10}...hidden")
|
|
|
387 |
|
388 |
dataset_repo = self.dataset_repo or f"{self.username}/trackio-experiments"
|
389 |
print(f" TRACKIO_DATASET_REPO={dataset_repo}")
|
|
|
394 |
print("2. Navigate to the 'Repository secrets' section")
|
395 |
print("3. Add the following secrets:")
|
396 |
print(f" Name: HF_TOKEN")
|
397 |
+
print(f" Value: <your token>")
|
398 |
print(f" Name: TRACKIO_DATASET_REPO")
|
399 |
print(f" Value: {dataset_repo}")
|
400 |
print(f" Name: TRACKIO_URL")
|
|
|
484 |
|
485 |
print(f"Using provided arguments:")
|
486 |
print(f" Space name: {space_name}")
|
487 |
+
print(f" Token: <hidden>")
|
488 |
print(f" Git email: {git_email or 'default'}")
|
489 |
print(f" Git name: {git_name or 'default'}")
|
490 |
print(f" Dataset repo: {dataset_repo or 'default'}")
|
scripts/training/train_gpt_oss.py
CHANGED
@@ -405,7 +405,6 @@ def create_sft_config(config, output_dir):
|
|
405 |
gradient_accumulation_steps=gradient_accumulation_steps,
|
406 |
|
407 |
# Model configuration
|
408 |
-
max_seq_length=config.max_seq_length,
|
409 |
gradient_checkpointing=getattr(config, 'use_gradient_checkpointing', True),
|
410 |
|
411 |
# Mixed precision
|
@@ -506,6 +505,8 @@ def train_gpt_oss(config_path, experiment_name, output_dir, trackio_url, trainer
|
|
506 |
args=sft_config,
|
507 |
train_dataset=dataset,
|
508 |
processing_class=tokenizer,
|
|
|
|
|
509 |
)
|
510 |
|
511 |
# Start training
|
|
|
405 |
gradient_accumulation_steps=gradient_accumulation_steps,
|
406 |
|
407 |
# Model configuration
|
|
|
408 |
gradient_checkpointing=getattr(config, 'use_gradient_checkpointing', True),
|
409 |
|
410 |
# Mixed precision
|
|
|
505 |
args=sft_config,
|
506 |
train_dataset=dataset,
|
507 |
processing_class=tokenizer,
|
508 |
+
dataset_text_field="text",
|
509 |
+
max_seq_length=getattr(config, 'max_seq_length', 2048),
|
510 |
)
|
511 |
|
512 |
# Start training
|