Spaces:
Sleeping
Sleeping
#!/usr/bin/env python | |
# Copyright 2022 The HuggingFace Team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import argparse | |
import os | |
import subprocess | |
from packaging.version import Version, parse | |
from accelerate.commands.config.config_args import default_config_file, load_config_from_file | |
_description = "Run commands across TPU VMs for initial setup before running `accelerate launch`." | |
def tpu_command_parser(subparsers=None): | |
if subparsers is not None: | |
parser = subparsers.add_parser("tpu-config", description=_description) | |
else: | |
parser = argparse.ArgumentParser("Accelerate tpu-config command", description=_description) | |
# Core arguments | |
config_args = parser.add_argument_group( | |
"Config Arguments", "Arguments that can be configured through `accelerate config`." | |
) | |
config_args.add_argument( | |
"--config_file", | |
type=str, | |
default=None, | |
help="Path to the config file to use for accelerate.", | |
) | |
config_args.add_argument( | |
"--tpu_name", | |
default=None, | |
help="The name of the TPU to use. If not specified, will use the TPU specified in the config file.", | |
) | |
config_args.add_argument( | |
"--tpu_zone", | |
default=None, | |
help="The zone of the TPU to use. If not specified, will use the zone specified in the config file.", | |
) | |
pod_args = parser.add_argument_group("TPU Arguments", "Arguments for options ran inside the TPU.") | |
pod_args.add_argument( | |
"--use_alpha", | |
action="store_true", | |
help="Whether to use `gcloud alpha` when running the TPU training script instead of `gcloud`.", | |
) | |
pod_args.add_argument( | |
"--command_file", | |
default=None, | |
help="The path to the file containing the commands to run on the pod on startup.", | |
) | |
pod_args.add_argument( | |
"--command", | |
action="append", | |
nargs="+", | |
help="A command to run on the pod. Can be passed multiple times.", | |
) | |
pod_args.add_argument( | |
"--install_accelerate", | |
action="store_true", | |
help="Whether to install accelerate on the pod. Defaults to False.", | |
) | |
pod_args.add_argument( | |
"--accelerate_version", | |
default="latest", | |
help="The version of accelerate to install on the pod. If not specified, will use the latest pypi version. Specify 'dev' to install from GitHub.", | |
) | |
pod_args.add_argument( | |
"--debug", action="store_true", help="If set, will print the command that would be run instead of running it." | |
) | |
if subparsers is not None: | |
parser.set_defaults(func=tpu_command_launcher) | |
return parser | |
def tpu_command_launcher(args): | |
defaults = None | |
# Get the default from the config file if it exists. | |
if args.config_file is not None or os.path.isfile(default_config_file): | |
defaults = load_config_from_file(args.config_file) | |
if not args.command_file and defaults.command_file is not None and not args.command: | |
args.command_file = defaults.command_file | |
if not args.command and defaults.commands is not None: | |
args.command = defaults.commands | |
if not args.tpu_name: | |
args.tpu_name = defaults.tpu_name | |
if not args.tpu_zone: | |
args.tpu_zone = defaults.tpu_zone | |
if args.accelerate_version == "dev": | |
args.accelerate_version = "git+https://github.com/huggingface/accelerate.git" | |
elif args.accelerate_version == "latest": | |
args.accelerate_version = "accelerate -U" | |
elif isinstance(parse(args.accelerate_version), Version): | |
args.accelerate_version = f"accelerate=={args.accelerate_version}" | |
if not args.command_file and not args.command: | |
raise ValueError("You must specify either a command file or a command to run on the pod.") | |
if args.command_file: | |
with open(args.command_file, "r") as f: | |
args.command = [f.read().splitlines()] | |
# To turn list of lists into list of strings | |
if isinstance(args.command[0], list): | |
args.command = [line for cmd in args.command for line in cmd] | |
# Default to the shared folder and install accelerate | |
new_cmd = ["cd /usr/share"] | |
if args.install_accelerate: | |
new_cmd += [f"pip install {args.accelerate_version}"] | |
new_cmd += args.command | |
args.command = "; ".join(new_cmd) | |
# Then send it to gcloud | |
# Eventually try to use google-api-core to do this instead of subprocess | |
cmd = ["gcloud"] | |
if args.use_alpha: | |
cmd += ["alpha"] | |
cmd += [ | |
"compute", | |
"tpus", | |
"tpu-vm", | |
"ssh", | |
args.tpu_name, | |
"--zone", | |
args.tpu_zone, | |
"--command", | |
args.command, | |
"--worker", | |
"all", | |
] | |
if args.debug: | |
print(f"Running {' '.join(cmd)}") | |
return | |
subprocess.run(cmd) | |
print("Successfully setup pod.") | |
def main(): | |
parser = tpu_command_parser() | |
args = parser.parse_args() | |
tpu_command_launcher(args) | |