{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "with open(\n", " \"/Users/apple/Desktop/workspace/UsefulTool/exp-command-generator/test.txt\", 'r'\n", ") as f:\n", " contents = f.read()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# find all \"#####\" indexes\n", "import re\n", "indexes = [m.start() for m in re.finditer('#####', contents)]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "assert len(indexes) % 2 == 0" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# split to span\n", "spans = []\n", "# spans.append(contents[:indexes[0]])\n", "for i in range(len(indexes)):\n", " if i != len(indexes) - 1:\n", " spans.append(contents[indexes[i]:indexes[i+1]])\n", "# spans.append(contents[indexes[-1]:])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "spans_with_type = [\n", " \n", "]\n", "for span in spans:\n", " if \"setup\" in span:\n", " spans_with_type.append((span, \"setup\"))\n", " elif \"loop\" in span:\n", " spans_with_type.append((span, \"loop\"))\n", " elif \"main\" in span:\n", " spans_with_type.append((span, \"command\"))\n", " else:\n", " spans_with_type.append((span, \"other\"))\n", " " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('##### setup\\n#!/bin/bash\\nfile_name=$(basename $0)\\ncurrent_path=$(pwd)\\ncd /data/yixin/workspace/unl-graph-usenix\\nsource activate /data/yixin/anaconda/unlg\\ndatasets=(\"IMDB-BINARY\" \"MUTAG\" \"ENZYMES\" \"IMDB-MULTI\" )\\nmodels=( \"gcn\" \"gin\" \"sage\" )\\nentity=\"mib-nlp\"\\nexp_name=\"adv-run-v3\"\\nbatch_size=8\\nmethods=( \"clean\" \"rand\" \"feat\" \"grad\" \"inject\" \"adv\")\\nwd=1e-5\\nadv_train_budgets=( 0.07 0.09 0.11 )\\ngen_exp_name=\"main-results-v2\"\\nlr=0.01\\nes_patience=40\\nseed_default=0\\noptimizer=\"adam\"\\nbudget=0.05\\ntotal_epoch=300\\nmax_steps=5000\\nseeds=(\"402\")\\n# mkdir $current_path/logs/ if not exist\\nmkdir -p $current_path/logs/\\nmkdir -p $current_path/logs/$exp_name\\n',\n", " 'setup'),\n", " ('#####\\n\\n\\n\\n', 'other'),\n", " ('##### loop\\nfor adv_train_budget in \"${adv_train_budgets[@]}\"; do\\nfor dataset in \"${datasets[@]}\"; do\\nfor model in \"${models[@]}\"; do\\nfor method in \"${methods[@]}\"; do\\n',\n", " 'loop'),\n", " ('##### \\n\\n ', 'other'),\n", " ('##### main\\n comb_command=\"for seed in ${seeds[@]} ; do nohup python eval.py --dataset $dataset --model ${model} --method ${method} --lr $lr --exp_name $exp_name --entity $entity --batch_size $batch_size --seed \\\\$seed --early_stop --num_epochs $total_epoch --wd $wd --device $device --es_patience $es_patience --optimizer $optimizer --max_steps $max_steps --adv_train --adv_train_budget $adv_train_budget --gen_exp_name $gen_exp_name > $current_path/logs/$exp_name/$dataset.$model.$method-\\\\$seed-$RANDOM$RANDOM.log 2>&1 ; done; \"\\n eval $comb_command & \\n \\n ',\n", " 'command'),\n", " ('##### \\n\\n', 'other'),\n", " ('##### \\ndone;\\ndone;\\ndone;\\ndone;\\n', 'other')]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "spans_with_type" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "gpu_env = \"\"\"\n", "username_mine=root\n", "max_gpu_utilization=90\n", "total_aviable=24564\n", "max_gpu_memory_gap=5000\n", "available_devices=( 0 1 2 3 4 5 6 7 8 9 )\n", "current_device_idx=-1\n", "sleeptime=30\n", "cpu_mean_max=77\n", "memory_rate_max=80\n", "constrain_total=true\n", "constrain_mine=false\n", "constrain_rate=2\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "update_device_func = \"\"\"\n", "function update_device_idx {\n", " if [ $constrain_total = true ]; then\n", " # check total cpu usage\n", " while true; do\n", " cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')\n", " sleep 1\n", " cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')\n", " sleep 1\n", " cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')\n", " cpu_mean=$(echo \"scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3\" | bc)\n", "\n", " # if currently cpu usage is less than the threshold, then break\n", " if [ $(echo \"$cpu_mean < $cpu_mean_max\" | bc) -eq 1 ]; then\n", " echo \"total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage\"\n", " break\n", " else\n", " echo \"total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds\"\n", " sleep 10\n", " fi\n", " done;\n", "\n", " # check total memory usage\n", " while true; do\n", " # get memory usage of whole system\n", " mem_used_1=$(free -m | awk '/Mem:/ {print $3}')\n", " sleep 1\n", " mem_used_2=$(free -m | awk '/Mem:/ {print $3}')\n", " sleep 1\n", " mem_used_3=$(free -m | awk '/Mem:/ {print $3}')\n", " mem_used=$(echo \"scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3\" | bc)\n", " \n", " # echo $mem_used\n", " # get rate of memory usage\n", " mem_rate=$(echo \"scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100\" | bc)\n", " # echo $mem_rate\n", " if [ $(echo \"$mem_rate < $memory_rate_max\" | bc) -eq 1 ]; then\n", " echo \"total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage\"\n", " break\n", " else\n", " echo \"total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds\"\n", " sleep 10\n", " fi\n", " done;\n", " fi;\n", "\n", " # if constrain_mine\n", " if [ $constrain_mine = true ]; then\n", "\n", " # check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max\n", " while true; do\n", " username=$username_mine\n", " cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')\n", " # echo $cpu_usage_user_sum\n", " total_aviable_cpu=$(nproc)\n", " total_aviable_cpu=$(echo \"$total_aviable_cpu*100\" | bc)\n", " # echo $total_aviable_cpu\n", " cpu_usage_user_ratio=$(echo \"scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100\" | bc)\n", " # echo $cpu_usage_user_ratio\n", "\n", " memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')\n", " # echo $memory_usage_user_sum\n", " memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')\n", " # echo $memory_usage_total\n", " memory_usage_user_ratio=$(echo \"scale=2; $memory_usage_user_sum/$memory_usage_total*100\" | bc)\n", " # echo $memory_usage_user_ratio\n", "\n", " # so my ratio should be less than 1/$constrain_rate of the given threshold\n", " cpu_mean_max_mine=$(echo \"$cpu_mean_max/$constrain_rate\" | bc)\n", " memory_rate_max_mine=$(echo \"$memory_rate_max/$constrain_rate\" | bc)\n", " if [ $(echo \"$cpu_usage_user_ratio < $cpu_mean_max_mine\" | bc) -eq 1 ] && [ $(echo \"$memory_usage_user_ratio < $memory_rate_max_mine\" | bc) -eq 1 ]; then\n", " echo \"my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off\"\n", " break\n", " else\n", " echo \"my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds\"\n", " sleep 10\n", " fi\n", " done;\n", " fi;\n", "\n", " # so all the conditions are satisfied, we can update the device idx and run the next experiment\n", " while true; do\n", " current_device_idx=$((current_device_idx+1))\n", " if [ $current_device_idx -ge ${#available_devices[@]} ]; then\n", " # reset \n", " current_device_idx=0\n", " fi\n", " # check whether this device is fully booked using nvidia-smi\n", " # get the gpu current memory usage \n", " useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})\n", " utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})\n", " \n", " if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then\n", " echo \"device ${available_devices[$current_device_idx]} is fully booked, try next one\"\n", " sleep 3\n", " continue\n", " else\n", " break\n", " fi\n", " done\n", " echo \"current device: ${available_devices[$current_device_idx]}\"\n", " device=${available_devices[$current_device_idx]}\n", "}\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [], "source": [ "update_device_command = \"update_device_idx;\\n\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "backend_run = False" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "gpu_utility = \"\"\n", "gpu_utility = gpu_env + \"\\n\\n\" + update_device_func \n", "with open(\"gpu_utility.sh\", 'w') as f:\n", " f.write(gpu_utility)" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "spans_with_type_added_device_control = []\n", "\n", "for span, type_ in spans_with_type:\n", " if type_ == \"setup\":\n", " spans_with_type_added_device_control.append((\n", " \"\"\"cd $(cd \"$(dirname \"$0\")\";pwd); source gpu_utility.sh\\n\\n\"\"\"\n", " , \"device_control\"))\n", " spans_with_type_added_device_control.append((span, type_))\n", " # spans_with_type_added_device_control.append((gpu_env, \"device_control\"))\n", " # spans_with_type_added_device_control.append((update_device_func, \"device_control\"))\n", " elif type_ == \"loop\":\n", " spans_with_type_added_device_control.append((span, type_))\n", " elif type_ == \"command\":\n", " spans_with_type_added_device_control.append((\"\\n\"+update_device_command, \"device_control\"))\n", " span_remove_the_first_part = span[span.index(\"\\n\"):]\n", " spans_with_type_added_device_control.append((f\"\\n\\ncommand=\\\"\\\"\\\"{span_remove_the_first_part}\\\"\\\"\\\"\\n\", type_))\n", " run_command = \"eval $command\"\n", " if backend_run:\n", " run_command += \" &\"\n", " run_command += \"\\n\\n\\n\"\n", " spans_with_type_added_device_control.append((run_command, type_))\n", " sleep_command = \"sleep $sleeptime\\n\\n\"\n", " spans_with_type_added_device_control.append((sleep_command, type_))\n", " else:\n", " spans_with_type_added_device_control.append((span, type_))\n", "spans_without_type = [span for span, type_ in spans_with_type_added_device_control]\n", "spans_without_type_str = \"\".join(spans_without_type)\n", "with open(\"./output.sh\", 'w') as f:\n", " f.write(spans_without_type_str)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }