#!/bin/bash # 定义参数列表 test_perturbations=("reverse_full") checkpoints=("checkpoint-500" "checkpoint-1000" "checkpoint-1500" "checkpoint-2000" "checkpoint-2500" "checkpoint-3000" "checkpoint-3500" "checkpoint-4000" "checkpoint-4500" "checkpoint-5000" "checkpoint-5500" "checkpoint-6000" "checkpoint-6500" "checkpoint-7000" "checkpoint-7500" "checkpoint-8000" "checkpoint-8500" "checkpoint-9000" "checkpoint-9500" "checkpoint-10000" "checkpoint-11500") random_seeds=(1 2 3 4 5) gpus=(1 2 3 4 5 6 7) # 使用指定的 GPU # 初始化任务索引 task_index=0 total_combinations=$((${#test_perturbations[@]} * ${#checkpoints[@]} * ${#random_seeds[@]})) # 检查指定 GPU 是否空闲 is_gpu_free() { gpu_id=$1 utilization=$(nvidia-smi -i $gpu_id --query-gpu=utilization.gpu --format=csv,noheader,nounits) if [ "$utilization" -lt 10 ]; then return 0 # GPU is free else return 1 # GPU is busy fi } # 获取参数组合的函数 get_next_task() { perturbation=${test_perturbations[$((task_index % ${#test_perturbations[@]}))]} checkpoint=${checkpoints[$(((task_index / ${#test_perturbations[@]}) % ${#checkpoints[@]}))]} seed=${random_seeds[$(((task_index / (${#test_perturbations[@]} * ${#checkpoints[@]})) % ${#random_seeds[@]}))]} } # 主循环 while [ $task_index -lt $total_combinations ]; do for gpu in "${gpus[@]}"; do if is_gpu_free $gpu; then get_next_task echo "Running experiment for $perturbation, $checkpoint, seed $seed on GPU $gpu" CUDA_VISIBLE_DEVICES=$gpu python perplexities_qwen.py "$perturbation" "$checkpoint" "$seed" & # 增加任务索引 task_index=$((task_index + 1)) # 启动下一个任务或退出 if [ $task_index -ge $total_combinations ]; then break fi fi done # 等待当前所有后台任务结束(每个 GPU 一个任务) wait sleep 5 # 短暂睡眠后继续检查 GPU done