Spaces:
Running
Running
adds trackio support for large experiment
Browse files- run_a100_large_experiment.py +20 -0
run_a100_large_experiment.py
CHANGED
@@ -38,6 +38,16 @@ def main():
|
|
38 |
action="store_true",
|
39 |
help="Print configuration without starting training"
|
40 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
args = parser.parse_args()
|
43 |
|
@@ -72,6 +82,12 @@ def main():
|
|
72 |
if args.experiment_name:
|
73 |
config.experiment_name = args.experiment_name
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
# Create output directory
|
76 |
os.makedirs(args.output_dir, exist_ok=True)
|
77 |
|
@@ -91,6 +107,10 @@ def main():
|
|
91 |
print(f"Max sequence length: {config.max_seq_length}")
|
92 |
print(f"Mixed precision: {'bf16' if config.bf16 else 'fp16'}")
|
93 |
print(f"Dataset: {config.dataset_name}")
|
|
|
|
|
|
|
|
|
94 |
print(f"{'='*60}\n")
|
95 |
|
96 |
if args.dry_run:
|
|
|
38 |
action="store_true",
|
39 |
help="Print configuration without starting training"
|
40 |
)
|
41 |
+
parser.add_argument(
|
42 |
+
"--trackio-url",
|
43 |
+
type=str,
|
44 |
+
help="Trackio URL for experiment tracking"
|
45 |
+
)
|
46 |
+
parser.add_argument(
|
47 |
+
"--trackio-token",
|
48 |
+
type=str,
|
49 |
+
help="Trackio token for authentication"
|
50 |
+
)
|
51 |
|
52 |
args = parser.parse_args()
|
53 |
|
|
|
82 |
if args.experiment_name:
|
83 |
config.experiment_name = args.experiment_name
|
84 |
|
85 |
+
# Override Trackio settings if provided
|
86 |
+
if args.trackio_url:
|
87 |
+
config.trackio_url = args.trackio_url
|
88 |
+
if args.trackio_token:
|
89 |
+
config.trackio_token = args.trackio_token
|
90 |
+
|
91 |
# Create output directory
|
92 |
os.makedirs(args.output_dir, exist_ok=True)
|
93 |
|
|
|
107 |
print(f"Max sequence length: {config.max_seq_length}")
|
108 |
print(f"Mixed precision: {'bf16' if config.bf16 else 'fp16'}")
|
109 |
print(f"Dataset: {config.dataset_name}")
|
110 |
+
if config.trackio_url:
|
111 |
+
print(f"Trackio URL: {config.trackio_url}")
|
112 |
+
if config.trackio_token:
|
113 |
+
print(f"Trackio Token: {'*' * len(config.trackio_token)}")
|
114 |
print(f"{'='*60}\n")
|
115 |
|
116 |
if args.dry_run:
|