File size: 3,004 Bytes
42303c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
#SBATCH -p g48
#SBATCH --job-name=myjob_shareGPT
#SBATCH --qos=high
#SBATCH --nodes=1 # Number of nodes
#SBATCH --ntasks=1 # Number of tasks (one for each script)
#SBATCH --cpus-per-task=60
#SBATCH --gres=gpu:6
#SBATCH --array=1-2 # Array range
# #SBATCH --output=./slurm_outputs/run_clm_job_%A_task_%a.out # Standard output
#SBATCH --output=/dev/null # Discard standard output # Because we write to the log.txt file
# # Get the current date and time
current_time=$(date +"%d-%m_%H-%M")
while test $# -gt 0; do
echo $1
case "$1" in
mkdir_is_exists() {
if [ -d "$1" ]; then
echo "Directory '$1' already exists."
mkdir -p "$1"
echo "Directory '$1' created."
mkdir_is_exists $OUTPUT_DIR
mkdir_is_exists $OUTPUT_DIR/experiment_code
git log -n 1 > $OUTPUT_DIR/commit.txt
pip freeze > $OUTPUT_DIR/pip_freeze.txt
echo $0 $ARGS $current_time > $OUTPUT_DIR/cmd.txt
cp -r ./ $OUTPUT_DIR/experiment_code
cp -r ./ $OUTPUT_DIR/experiment_code
cp -r config $OUTPUT_DIR/experiment_code
cp -r ./ $OUTPUT_DIR/experiment_code
cp -r ./requirements.txt $OUTPUT_DIR/experiment_code
# Define the Python scripts and their corresponding input files
declare -A scripts_and_inputs=(
# ["3"]="./config/config1.yaml"
# ["4"]="./config/config1.yaml"
# ["5"]="./config/config1.yaml"
# ["6"]="./config/config1.yaml"
# ["7"]="./config/config1.yaml"
# ["8"]="./config/config1.yaml"
# ["9"]="./config/config1.yaml"
# ["10"]="./config/config1.yaml"
# ["11"]="./config/config1.yaml"
# ["12"]="./config/config1.yaml"
# ["13"]="./config/config1.yaml"
# ["14"]="./config/config1.yaml"
# ["15"]="./config/config1.yaml"
# ["16"]="./config/config1.yaml"
# ["17"]="./config/config1.yaml"
# ["18"]="./config/config1.yaml"
# ["19"]="./config/config1.yaml"
# ["20"]="./config/config1.yaml"
# Launch each script with its corresponding input file as a separate task
echo "Starting job array task: $SLURM_ARRAY_TASK_ID"
export DEFAULT_CONFIG_FILE="./config/config1.yaml"
srun --exclusive python --output_dir $OUTPUT_DIR --logging_dir $OUTPUT_DIR --config_file $INPUT_DIR 2>&1 | tee $OUTPUT_DIR/log.txt
# Wait for all background jobs to complete
# Print a message indicating completion
echo "All Python scripts have been executed."
# mv ./slurm_outputs/run_clm_job_$SLURM_ARRAY_JOB_ID*$SLURM_ARRAY_TASK_ID* "$output_dir/"
# python -m torch.distributed.launch ~/target_draft_coupling_code/target_draft_training/ --multirun task=1,2 |