Impossible_llm / perplexities /run_experiments.sh
Yaning1001's picture
Add files using upload-large-folder tool
94011a1 verified
#!/bin/bash
# 定义参数列表
test_perturbations=("reverse_full")
checkpoints=("checkpoint-500" "checkpoint-1000" "checkpoint-1500" "checkpoint-2000" "checkpoint-2500" "checkpoint-3000"
"checkpoint-3500" "checkpoint-4000" "checkpoint-4500" "checkpoint-5000" "checkpoint-5500" "checkpoint-6000"
"checkpoint-6500" "checkpoint-7000" "checkpoint-7500" "checkpoint-8000" "checkpoint-8500" "checkpoint-9000"
"checkpoint-9500" "checkpoint-10000" "checkpoint-11500")
random_seeds=(1 2 3 4 5)
gpus=(1 2 3 4 5 6 7) # 使用指定的 GPU
# 初始化任务索引
task_index=0
total_combinations=$((${#test_perturbations[@]} * ${#checkpoints[@]} * ${#random_seeds[@]}))
# 检查指定 GPU 是否空闲
is_gpu_free() {
gpu_id=$1
utilization=$(nvidia-smi -i $gpu_id --query-gpu=utilization.gpu --format=csv,noheader,nounits)
if [ "$utilization" -lt 10 ]; then
return 0 # GPU is free
else
return 1 # GPU is busy
fi
}
# 获取参数组合的函数
get_next_task() {
perturbation=${test_perturbations[$((task_index % ${#test_perturbations[@]}))]}
checkpoint=${checkpoints[$(((task_index / ${#test_perturbations[@]}) % ${#checkpoints[@]}))]}
seed=${random_seeds[$(((task_index / (${#test_perturbations[@]} * ${#checkpoints[@]})) % ${#random_seeds[@]}))]}
}
# 主循环
while [ $task_index -lt $total_combinations ]; do
for gpu in "${gpus[@]}"; do
if is_gpu_free $gpu; then
get_next_task
echo "Running experiment for $perturbation, $checkpoint, seed $seed on GPU $gpu"
CUDA_VISIBLE_DEVICES=$gpu python perplexities_qwen.py "$perturbation" "$checkpoint" "$seed" &
# 增加任务索引
task_index=$((task_index + 1))
# 启动下一个任务或退出
if [ $task_index -ge $total_combinations ]; then
break
fi
fi
done
# 等待当前所有后台任务结束(每个 GPU 一个任务)
wait
sleep 5 # 短暂睡眠后继续检查 GPU
done