Spaces:
Runtime error
Runtime error
Yixin Liu
commited on
Commit
·
bb690a1
1
Parent(s):
f1afb35
upload
Browse files- __pycache__/config.cpython-39.pyc +0 -0
- __pycache__/parse_code.cpython-39.pyc +0 -0
- config.py +107 -0
- gpu_utility.sh +7 -7
- main.py +139 -122
- parse_code.py +61 -0
- res/20230615-17h44m58s/gpu_utility.sh +118 -0
- res/20230615-17h44m58s/script.sh +29 -0
- res/20230615-17h45m38s/gpu_utility.sh +118 -0
- res/20230615-17h45m38s/script.sh +29 -0
- res/20230615-17h46m42s/gpu_utility.sh +118 -0
- res/20230615-17h46m42s/script.sh +29 -0
- res/20230615-17h48m58s.zip +0 -0
- res/20230615-17h48m58s/gpu_utility.sh +118 -0
- res/20230615-17h48m58s/script.sh +29 -0
- res/20230615-17h49m08s.zip +0 -0
- res/20230615-17h49m08s/gpu_utility.sh +118 -0
- res/20230615-17h49m08s/script.sh +29 -0
- res/20230615-17h49m45s/gpu_utility.sh +118 -0
- res/20230615-17h49m45s/script.sh +29 -0
- res/20230615-17h50m13s/gpu_utility.sh +118 -0
- res/20230615-17h50m13s/script.sh +29 -0
- res/20230615-17h50m22s/gpu_utility.sh +118 -0
- res/20230615-17h50m22s/script.sh +29 -0
- res/20230615-17h50m57s/gpu_utility.sh +118 -0
- res/20230615-17h50m57s/script.sh +29 -0
- res/20230615-17h51m33s/gpu_utility.sh +118 -0
- res/20230615-17h51m33s/script.sh +29 -0
- res/20230615-17h51m43s/gpu_utility.sh +118 -0
- res/20230615-17h51m43s/script.sh +29 -0
- res/20230615-17h53m28s/gpu_utility.sh +118 -0
- res/20230615-17h53m28s/script.sh +25 -0
- res/20230615-17h53m44s/gpu_utility.sh +118 -0
- res/20230615-17h53m44s/script.sh +24 -0
- res/20230615-17h55m17s/gpu_utility.sh +118 -0
- res/20230615-17h55m17s/script.sh +23 -0
- res/20230615-17h56m22s/gpu_utility.sh +118 -0
- res/20230615-17h56m22s/script.sh +23 -0
__pycache__/config.cpython-39.pyc
ADDED
Binary file (5.37 kB). View file
|
|
__pycache__/parse_code.cpython-39.pyc
ADDED
Binary file (1.61 kB). View file
|
|
config.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
update_device_command = "update_device_idx;\n"
|
3 |
+
|
4 |
+
update_device_func = """
|
5 |
+
function update_device_idx {
|
6 |
+
if [ $constrain_total = true ]; then
|
7 |
+
# check total cpu usage
|
8 |
+
while true; do
|
9 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
10 |
+
sleep 1
|
11 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
12 |
+
sleep 1
|
13 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
14 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
15 |
+
|
16 |
+
# if currently cpu usage is less than the threshold, then break
|
17 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
18 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
19 |
+
break
|
20 |
+
else
|
21 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
22 |
+
sleep 10
|
23 |
+
fi
|
24 |
+
done;
|
25 |
+
|
26 |
+
# check total memory usage
|
27 |
+
while true; do
|
28 |
+
# get memory usage of whole system
|
29 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
30 |
+
sleep 1
|
31 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
32 |
+
sleep 1
|
33 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
34 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
35 |
+
|
36 |
+
# echo $mem_used
|
37 |
+
# get rate of memory usage
|
38 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
39 |
+
# echo $mem_rate
|
40 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
41 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
42 |
+
break
|
43 |
+
else
|
44 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
45 |
+
sleep 10
|
46 |
+
fi
|
47 |
+
done;
|
48 |
+
fi;
|
49 |
+
|
50 |
+
# if constrain_mine
|
51 |
+
if [ $constrain_mine = true ]; then
|
52 |
+
|
53 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
54 |
+
while true; do
|
55 |
+
username=$username_mine
|
56 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
57 |
+
# echo $cpu_usage_user_sum
|
58 |
+
total_aviable_cpu=$(nproc)
|
59 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
60 |
+
# echo $total_aviable_cpu
|
61 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
62 |
+
# echo $cpu_usage_user_ratio
|
63 |
+
|
64 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
65 |
+
# echo $memory_usage_user_sum
|
66 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
67 |
+
# echo $memory_usage_total
|
68 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
69 |
+
# echo $memory_usage_user_ratio
|
70 |
+
|
71 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
72 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
73 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
74 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
75 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
76 |
+
break
|
77 |
+
else
|
78 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
79 |
+
sleep 10
|
80 |
+
fi
|
81 |
+
done;
|
82 |
+
fi;
|
83 |
+
|
84 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
85 |
+
while true; do
|
86 |
+
current_device_idx=$((current_device_idx+1))
|
87 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
88 |
+
# reset
|
89 |
+
current_device_idx=0
|
90 |
+
fi
|
91 |
+
# check whether this device is fully booked using nvidia-smi
|
92 |
+
# get the gpu current memory usage
|
93 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
94 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
95 |
+
|
96 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
97 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
98 |
+
sleep 3
|
99 |
+
continue
|
100 |
+
else
|
101 |
+
break
|
102 |
+
fi
|
103 |
+
done
|
104 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
105 |
+
device=${available_devices[$current_device_idx]}
|
106 |
+
}
|
107 |
+
"""
|
gpu_utility.sh
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
|
2 |
-
|
3 |
max_gpu_utilization=90
|
4 |
-
|
5 |
max_gpu_memory_gap=5000
|
6 |
-
available_devices=( 0 1 2 3 4
|
7 |
current_device_idx=-1
|
8 |
-
sleeptime=
|
9 |
cpu_mean_max=77
|
10 |
memory_rate_max=80
|
11 |
-
constrain_total=
|
12 |
-
constrain_mine=
|
13 |
-
constrain_rate=
|
14 |
|
15 |
|
16 |
|
|
|
1 |
|
2 |
+
username=yila22
|
3 |
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
cpu_mean_max=77
|
10 |
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
|
15 |
|
16 |
|
main.py
CHANGED
@@ -4,144 +4,161 @@ import streamlit as st
|
|
4 |
import pandas as pd
|
5 |
import numpy as np
|
6 |
import time
|
7 |
-
|
8 |
-
|
9 |
-
# import plotly.figure_factory as ff
|
10 |
-
# import altair as alt
|
11 |
-
# from PIL import Image
|
12 |
-
# import base64
|
13 |
-
# import tarfile
|
14 |
-
# import os
|
15 |
-
# import requests
|
16 |
-
|
17 |
|
18 |
|
19 |
# title
|
20 |
st.title("Exp Command Generator")
|
21 |
|
22 |
# experiment mode
|
23 |
-
exp_mode = st.sidebar.selectbox("Select Experiment Mode", ["OneExpOnecard", "MultipleExpOnecard"],key="MultipleExpOnecard")
|
24 |
|
25 |
## 检查框
|
26 |
-
debug = st.sidebar.checkbox("Debug
|
27 |
# st.sidebar.write(f"checkbox的值是{res}")
|
28 |
|
29 |
-
setup = st.sidebar.text_area("Some setup of env at beginning.", """cd $(dirname $(dirname $0))
|
30 |
-
source activate xai
|
31 |
-
export PYTHONPATH=${PYTHONPATH}:/Users/apple/Desktop/workspace/research_project/attention:/mnt/yixin/:/home/yila22/prj""")
|
32 |
-
|
33 |
-
exp_hyper = st.sidebar.text_area("Hyperparameters", """exp_name="debug-adv-training-emotion"
|
34 |
-
dataset=emotion
|
35 |
-
n_epoch=3
|
36 |
-
K=3
|
37 |
-
encoder=bert
|
38 |
-
lambda_1=1
|
39 |
-
lambda_2=1
|
40 |
-
x_pgd_radius=0.01
|
41 |
-
pgd_radius=0.001
|
42 |
-
seed=2
|
43 |
-
bsize=8
|
44 |
-
lr=5e-5""")
|
45 |
|
46 |
## gpu 相关参数
|
47 |
-
gpu_list = st.sidebar.multiselect("multi select", range(10), [
|
48 |
# print(gpu_list)
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
g = st.button("Generate")
|
92 |
if g:
|
93 |
-
s = ""
|
94 |
-
s += setup + "\n\n"
|
95 |
-
s += exp_hyper + "\n\n"
|
96 |
-
s += gpu_hyper + "\n\n"
|
97 |
-
s += hyper_loop + "\n\n"
|
98 |
-
s += """
|
99 |
-
while true; do
|
100 |
-
gpu_id=${gpu[$i]}
|
101 |
-
# nvidia-smi --query-gpu=utilization.gpu --format=csv -i 2 | grep -Eo "[0-9]+"
|
102 |
-
gpu_u=$(nvidia-smi --query-gpu=utilization.gpu --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
103 |
-
free_mem=$(nvidia-smi --query-gpu=memory.free --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
104 |
-
if [[ $free_mem -lt $allow_gpu_memory_threshold || $gpu_u -ge ${gpu_threshold} ]]; then
|
105 |
-
i=`expr $i + 1`
|
106 |
-
i=`expr $i % $gpunum`
|
107 |
-
echo "gpu id ${gpu[$i]} is full loaded, skip"
|
108 |
-
if [ "$i" == "0" ]; then
|
109 |
-
sleep ${all_full_sleep_time}
|
110 |
-
echo "all the gpus are full, sleep 1m"
|
111 |
-
fi
|
112 |
-
else
|
113 |
-
break
|
114 |
-
fi
|
115 |
-
done
|
116 |
-
|
117 |
-
gpu_id=${gpu[$i]}
|
118 |
-
# search from the next gpu
|
119 |
-
i=`expr $i + 1`
|
120 |
-
i=`expr $i % $gpunum`
|
121 |
-
|
122 |
-
free_mem=$(nvidia-smi --query-gpu=memory.free --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
123 |
-
gpu_u=$(nvidia-smi --query-gpu=utilization.gpu --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
124 |
-
export CUDA_VISIBLE_DEVICES=$gpu_id
|
125 |
-
echo "use gpu id is ${gpu[$i]}, free memory is $free_mem, it utilization is ${gpu_u}%"
|
126 |
-
"""
|
127 |
-
s += f"""com="{python_cmd}"\n"""
|
128 |
-
s += "echo $com\n"
|
129 |
-
s += "echo ==========================================================================================\n"
|
130 |
-
if debug:
|
131 |
-
s += "$com\n"
|
132 |
-
s += "# mkdir -p ./logs/\n"
|
133 |
-
s += "# nohup $com > ./logs/$exp_name-$RANDOM.log 2>&1 &\n"
|
134 |
-
else:
|
135 |
-
s += "# $com\n"
|
136 |
-
s += "mkdir -p ./logs/\n"
|
137 |
-
s += "nohup $com > ./logs/$exp_name-$RANDOM.log 2>&1 &\n"
|
138 |
-
s += """echo "sleep for $sleep_time_after_loading_task to wait the task loaded"
|
139 |
-
sleep $sleep_time_after_loading_task\n"""
|
140 |
-
s += end_loop
|
141 |
st.success("Finished")
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
|
|
|
4 |
import pandas as pd
|
5 |
import numpy as np
|
6 |
import time
|
7 |
+
from config import update_device_func
|
8 |
+
from parse_code import parse_base_code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
# title
|
12 |
st.title("Exp Command Generator")
|
13 |
|
14 |
# experiment mode
|
15 |
+
# exp_mode = st.sidebar.selectbox("Select Experiment Mode", ["OneExpOnecard", "MultipleExpOnecard"],key="MultipleExpOnecard")
|
16 |
|
17 |
## 检查框
|
18 |
+
debug = st.sidebar.checkbox("Debug: 选择则会串行地执行命令", value=True)
|
19 |
# st.sidebar.write(f"checkbox的值是{res}")
|
20 |
|
21 |
+
# setup = st.sidebar.text_area("Some setup of env at beginning.", """cd $(dirname $(dirname $0))
|
22 |
+
# source activate xai
|
23 |
+
# export PYTHONPATH=${PYTHONPATH}:/Users/apple/Desktop/workspace/research_project/attention:/mnt/yixin/:/home/yila22/prj""")
|
24 |
+
|
25 |
+
# exp_hyper = st.sidebar.text_area("Hyperparameters", """exp_name="debug-adv-training-emotion"
|
26 |
+
# dataset=emotion
|
27 |
+
# n_epoch=3
|
28 |
+
# K=3
|
29 |
+
# encoder=bert
|
30 |
+
# lambda_1=1
|
31 |
+
# lambda_2=1
|
32 |
+
# x_pgd_radius=0.01
|
33 |
+
# pgd_radius=0.001
|
34 |
+
# seed=2
|
35 |
+
# bsize=8
|
36 |
+
# lr=5e-5""")
|
37 |
|
38 |
## gpu 相关参数
|
39 |
+
gpu_list = st.sidebar.multiselect("multi select", range(10), [0,1,2,3,4,])
|
40 |
# print(gpu_list)
|
41 |
+
allow_gpu_memory_threshold_default=5000
|
42 |
+
gpu_threshold_default=90
|
43 |
+
total_gpu_memory = st.sidebar.number_input("单卡总容量", value=24564, min_value=0, max_value=30000, step=1000)
|
44 |
+
max_gpu_memory_gap = st.sidebar.number_input("最小单卡剩余容量", value=allow_gpu_memory_threshold_default, min_value=0, max_value=total_gpu_memory, step=500)
|
45 |
+
max_gpu_utilization = st.sidebar.number_input("最大单卡利用率", value=gpu_threshold_default, min_value=0, max_value=100, step=10)
|
46 |
+
sleep_time_after_loading_task= st.sidebar.number_input("加载任务后等待秒数", value=10, min_value=0,step=5)
|
47 |
+
# all_full_sleep_time = st.sidebar.number_input("全满之后等待秒数", value=20, min_value=0,step=5)
|
48 |
+
username = st.sidebar.text_input("用户名", value="yila22")
|
49 |
+
cpu_max_utility = st.sidebar.number_input("cpu最大利用率", value=77, min_value=0, max_value=100, step=1)
|
50 |
+
memory_max_utility = st.sidebar.number_input("内存最大利用率", value=80, min_value=0, max_value=100, step=1)
|
51 |
+
constrain_total = st.sidebar.checkbox("限制总资源", value=True)
|
52 |
+
constrain_mine = st.sidebar.checkbox("限制我的资源", value=False)
|
53 |
+
constrain_rate = st.sidebar.number_input("限制率", value=2, min_value=1, max_value=10, step=1)
|
54 |
+
|
55 |
+
# username_mine=root
|
56 |
+
# max_gpu_utilization=90
|
57 |
+
# total_gpu_memory=24564
|
58 |
+
# max_gpu_memory_gap=5000
|
59 |
+
# available_devices=( 0 1 2 3 4 5 6 7 8 9 )
|
60 |
+
# current_device_idx=-1
|
61 |
+
# sleeptime=30
|
62 |
+
# cpu_mean_max=77
|
63 |
+
# memory_rate_max=80
|
64 |
+
# constrain_total=true
|
65 |
+
# constrain_mine=false
|
66 |
+
# constrain_rate=2
|
67 |
+
gpu_list = " ".join([str(i) for i in gpu_list])
|
68 |
+
setup_for_gpu_utility = f"""
|
69 |
+
username={username}
|
70 |
+
max_gpu_utilization={max_gpu_utilization}
|
71 |
+
total_gpu_memory={total_gpu_memory}
|
72 |
+
max_gpu_memory_gap={max_gpu_memory_gap}
|
73 |
+
available_devices=( {gpu_list} )
|
74 |
+
current_device_idx=-1
|
75 |
+
sleeptime={sleep_time_after_loading_task}
|
76 |
+
cpu_mean_max={cpu_max_utility}
|
77 |
+
memory_rate_max={memory_max_utility}
|
78 |
+
constrain_total={constrain_total}
|
79 |
+
constrain_mine={constrain_mine}
|
80 |
+
constrain_rate={constrain_rate}
|
81 |
+
"""
|
82 |
+
|
83 |
+
|
84 |
+
base_code = st.text_area("Base Code", """##### setup
|
85 |
+
export CUDA_VISIBLE_DEVICES=2
|
86 |
+
source activate /data/yixin/anaconda/mib
|
87 |
+
exp_name="single_user"
|
88 |
+
#####
|
89 |
+
|
90 |
+
##### loop
|
91 |
+
for poison_method in char_basic word_basic sent_basic; do
|
92 |
+
for dataset_idx in 0 1 2; do
|
93 |
+
#####
|
94 |
+
|
95 |
+
##### main
|
96 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 \
|
97 |
+
--loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name \
|
98 |
+
--log_wb
|
99 |
+
#####
|
100 |
+
|
101 |
+
#####
|
102 |
+
done;done;
|
103 |
+
#####""", height=400)
|
104 |
+
|
105 |
+
|
106 |
|
107 |
g = st.button("Generate")
|
108 |
if g:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
st.success("Finished")
|
110 |
+
contents = base_code
|
111 |
+
gpu_utility = ""
|
112 |
+
gpu_utility = setup_for_gpu_utility + "\n\n" + update_device_func
|
113 |
+
|
114 |
+
|
115 |
+
new_code = parse_base_code(contents, debug=debug)
|
116 |
+
|
117 |
+
# create file for download
|
118 |
+
timestr = time.strftime("%Y%m%d-%Hh%Mm%Ss")
|
119 |
+
import os
|
120 |
+
os.makedirs(f"./res/{timestr}", exist_ok=True)
|
121 |
+
filename_script = f"./res/{timestr}/script.sh"
|
122 |
+
with open(filename_script, "w") as f:
|
123 |
+
f.write(new_code)
|
124 |
+
filename_config = f"./res/{timestr}/gpu_utility.sh"
|
125 |
+
with open(filename_config, "w") as f:
|
126 |
+
f.write(gpu_utility)
|
127 |
+
|
128 |
+
# zip them into one file
|
129 |
+
# import shutil
|
130 |
+
# shutil.make_archive(f"./res/{timestr}", 'zip', f"./res/{timestr}")
|
131 |
+
# st.download_button(
|
132 |
+
# label="Download zip",
|
133 |
+
# data=f"./res/{timestr}.zip",
|
134 |
+
# file_name=f"{timestr}.zip",
|
135 |
+
# mime="application/zip",
|
136 |
+
# )
|
137 |
+
|
138 |
+
|
139 |
+
st.download_button(
|
140 |
+
label="Download script",
|
141 |
+
data=new_code,
|
142 |
+
file_name=filename_script,
|
143 |
+
mime="text/plain",
|
144 |
+
)
|
145 |
+
# after clicking i don't want the website to refresh
|
146 |
+
st.download_button(
|
147 |
+
label="Download gpu_utility.sh",
|
148 |
+
data=gpu_utility,
|
149 |
+
file_name=filename_config,
|
150 |
+
mime="text/plain",
|
151 |
+
)
|
152 |
+
|
153 |
+
# st.markdown(f"### [Download script](./{filename_script})")
|
154 |
+
# st.markdown(f"### [Download gpu_utility.sh](P{filename_config})")
|
155 |
+
st.markdown("## script.sh")
|
156 |
+
st.code(new_code, language="shell")
|
157 |
+
|
158 |
+
|
159 |
+
st.markdown("## gpu_utility.sh")
|
160 |
+
st.code(gpu_utility, language="shell")
|
161 |
+
|
162 |
+
|
163 |
|
164 |
|
parse_code.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from config import update_device_command
|
2 |
+
|
3 |
+
def parse_base_code(contents, debug = False):
|
4 |
+
import re
|
5 |
+
indexes = [m.start() for m in re.finditer('#####', contents)]
|
6 |
+
|
7 |
+
assert len(indexes) % 2 == 0
|
8 |
+
|
9 |
+
# split to span
|
10 |
+
spans = []
|
11 |
+
# spans.append(contents[:indexes[0]])
|
12 |
+
for i in range(len(indexes)):
|
13 |
+
if i != len(indexes) - 1:
|
14 |
+
spans.append(contents[indexes[i]:indexes[i+1]])
|
15 |
+
# spans.append(contents[indexes[-1]:])
|
16 |
+
|
17 |
+
spans_with_type = [
|
18 |
+
|
19 |
+
]
|
20 |
+
for span in spans:
|
21 |
+
if "setup" in span:
|
22 |
+
spans_with_type.append((span, "setup"))
|
23 |
+
elif "loop" in span:
|
24 |
+
spans_with_type.append((span, "loop"))
|
25 |
+
elif "main" in span:
|
26 |
+
spans_with_type.append((span, "command"))
|
27 |
+
else:
|
28 |
+
spans_with_type.append((span, "other"))
|
29 |
+
|
30 |
+
spans_with_type_added_device_control = []
|
31 |
+
|
32 |
+
for span, type_ in spans_with_type:
|
33 |
+
if type_ == "setup":
|
34 |
+
spans_with_type_added_device_control.append((
|
35 |
+
"""cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh\n\n"""
|
36 |
+
, "device_control"))
|
37 |
+
spans_with_type_added_device_control.append((span, type_))
|
38 |
+
# spans_with_type_added_device_control.append((gpu_env, "device_control"))
|
39 |
+
# spans_with_type_added_device_control.append((update_device_func, "device_control"))
|
40 |
+
elif type_ == "loop":
|
41 |
+
spans_with_type_added_device_control.append((span, type_))
|
42 |
+
elif type_ == "command":
|
43 |
+
spans_with_type_added_device_control.append((update_device_command, "device_control"))
|
44 |
+
span_remove_the_first_part = span[span.index("\n"):]
|
45 |
+
if not debug:
|
46 |
+
spans_with_type_added_device_control.append((f"\n\ncommand=\"\"\"{span_remove_the_first_part}\"\"\"\n", type_))
|
47 |
+
run_command = "eval $command"
|
48 |
+
run_command += " &"
|
49 |
+
run_command += "\n\n\n"
|
50 |
+
spans_with_type_added_device_control.append((run_command, type_))
|
51 |
+
else:
|
52 |
+
spans_with_type_added_device_control.append(
|
53 |
+
(f"{span_remove_the_first_part}\n", type_)
|
54 |
+
)
|
55 |
+
sleep_command = "sleep $sleeptime\n\n"
|
56 |
+
spans_with_type_added_device_control.append((sleep_command, type_))
|
57 |
+
else:
|
58 |
+
spans_with_type_added_device_control.append((span, type_))
|
59 |
+
spans_without_type = [span for span, type_ in spans_with_type_added_device_control]
|
60 |
+
spans_without_type_str = "".join(spans_without_type)
|
61 |
+
return spans_without_type_str
|
res/20230615-17h44m58s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h44m58s/script.sh
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
command="""
|
19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
20 |
+
"""
|
21 |
+
eval $command &
|
22 |
+
|
23 |
+
|
24 |
+
sleep $sleeptime
|
25 |
+
|
26 |
+
#####
|
27 |
+
|
28 |
+
#####
|
29 |
+
done;done;
|
res/20230615-17h45m38s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h45m38s/script.sh
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
command="""
|
19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
20 |
+
"""
|
21 |
+
eval $command &
|
22 |
+
|
23 |
+
|
24 |
+
sleep $sleeptime
|
25 |
+
|
26 |
+
#####
|
27 |
+
|
28 |
+
#####
|
29 |
+
done;done;
|
res/20230615-17h46m42s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h46m42s/script.sh
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
command="""
|
19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
20 |
+
"""
|
21 |
+
eval $command &
|
22 |
+
|
23 |
+
|
24 |
+
sleep $sleeptime
|
25 |
+
|
26 |
+
#####
|
27 |
+
|
28 |
+
#####
|
29 |
+
done;done;
|
res/20230615-17h48m58s.zip
ADDED
Binary file (1.9 kB). View file
|
|
res/20230615-17h48m58s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h48m58s/script.sh
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
command="""
|
19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
20 |
+
"""
|
21 |
+
eval $command &
|
22 |
+
|
23 |
+
|
24 |
+
sleep $sleeptime
|
25 |
+
|
26 |
+
#####
|
27 |
+
|
28 |
+
#####
|
29 |
+
done;done;
|
res/20230615-17h49m08s.zip
ADDED
Binary file (1.9 kB). View file
|
|
res/20230615-17h49m08s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h49m08s/script.sh
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
command="""
|
19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
20 |
+
"""
|
21 |
+
eval $command &
|
22 |
+
|
23 |
+
|
24 |
+
sleep $sleeptime
|
25 |
+
|
26 |
+
#####
|
27 |
+
|
28 |
+
#####
|
29 |
+
done;done;
|
res/20230615-17h49m45s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h49m45s/script.sh
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
command="""
|
19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
20 |
+
"""
|
21 |
+
eval $command &
|
22 |
+
|
23 |
+
|
24 |
+
sleep $sleeptime
|
25 |
+
|
26 |
+
#####
|
27 |
+
|
28 |
+
#####
|
29 |
+
done;done;
|
res/20230615-17h50m13s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h50m13s/script.sh
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
command="""
|
19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
20 |
+
"""
|
21 |
+
eval $command &
|
22 |
+
|
23 |
+
|
24 |
+
sleep $sleeptime
|
25 |
+
|
26 |
+
#####
|
27 |
+
|
28 |
+
#####
|
29 |
+
done;done;
|
res/20230615-17h50m22s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h50m22s/script.sh
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
command="""
|
19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
20 |
+
"""
|
21 |
+
eval $command &
|
22 |
+
|
23 |
+
|
24 |
+
sleep $sleeptime
|
25 |
+
|
26 |
+
#####
|
27 |
+
|
28 |
+
#####
|
29 |
+
done;
|
res/20230615-17h50m57s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h50m57s/script.sh
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
command="""
|
19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
20 |
+
"""
|
21 |
+
eval $command &
|
22 |
+
|
23 |
+
|
24 |
+
sleep $sleeptime
|
25 |
+
|
26 |
+
#####
|
27 |
+
|
28 |
+
#####
|
29 |
+
done;done;
|
res/20230615-17h51m33s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h51m33s/script.sh
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
command="""
|
19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
20 |
+
"""
|
21 |
+
eval $command &
|
22 |
+
|
23 |
+
|
24 |
+
sleep $sleeptime
|
25 |
+
|
26 |
+
#####
|
27 |
+
|
28 |
+
#####
|
29 |
+
done;done;
|
res/20230615-17h51m43s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h51m43s/script.sh
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
command="""
|
19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
20 |
+
"""
|
21 |
+
eval $command &
|
22 |
+
|
23 |
+
|
24 |
+
sleep $sleeptime
|
25 |
+
|
26 |
+
#####
|
27 |
+
|
28 |
+
#####
|
29 |
+
done;done;
|
res/20230615-17h53m28s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h53m28s/script.sh
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
|
18 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
19 |
+
|
20 |
+
sleep $sleeptime
|
21 |
+
|
22 |
+
#####
|
23 |
+
|
24 |
+
#####
|
25 |
+
done;done;
|
res/20230615-17h53m44s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h53m44s/script.sh
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
|
15 |
+
update_device_idx;
|
16 |
+
|
17 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
18 |
+
|
19 |
+
sleep $sleeptime
|
20 |
+
|
21 |
+
#####
|
22 |
+
|
23 |
+
#####
|
24 |
+
done;done;
|
res/20230615-17h55m17s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=True
|
13 |
+
constrain_rate=True
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h55m17s/script.sh
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
update_device_idx;
|
15 |
+
|
16 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
17 |
+
|
18 |
+
sleep $sleeptime
|
19 |
+
|
20 |
+
#####
|
21 |
+
|
22 |
+
#####
|
23 |
+
done;done;
|
res/20230615-17h56m22s/gpu_utility.sh
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
username=yila22
|
3 |
+
max_gpu_utilization=90
|
4 |
+
total_gpu_memory=24564
|
5 |
+
max_gpu_memory_gap=5000
|
6 |
+
available_devices=( 0 1 2 3 4 )
|
7 |
+
current_device_idx=-1
|
8 |
+
sleeptime=10
|
9 |
+
cpu_mean_max=77
|
10 |
+
memory_rate_max=80
|
11 |
+
constrain_total=True
|
12 |
+
constrain_mine=False
|
13 |
+
constrain_rate=2
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
function update_device_idx {
|
18 |
+
if [ $constrain_total = true ]; then
|
19 |
+
# check total cpu usage
|
20 |
+
while true; do
|
21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
22 |
+
sleep 1
|
23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
24 |
+
sleep 1
|
25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
27 |
+
|
28 |
+
# if currently cpu usage is less than the threshold, then break
|
29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
31 |
+
break
|
32 |
+
else
|
33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
34 |
+
sleep 10
|
35 |
+
fi
|
36 |
+
done;
|
37 |
+
|
38 |
+
# check total memory usage
|
39 |
+
while true; do
|
40 |
+
# get memory usage of whole system
|
41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
42 |
+
sleep 1
|
43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
44 |
+
sleep 1
|
45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
47 |
+
|
48 |
+
# echo $mem_used
|
49 |
+
# get rate of memory usage
|
50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
51 |
+
# echo $mem_rate
|
52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
54 |
+
break
|
55 |
+
else
|
56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
57 |
+
sleep 10
|
58 |
+
fi
|
59 |
+
done;
|
60 |
+
fi;
|
61 |
+
|
62 |
+
# if constrain_mine
|
63 |
+
if [ $constrain_mine = true ]; then
|
64 |
+
|
65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
66 |
+
while true; do
|
67 |
+
username=$username_mine
|
68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
69 |
+
# echo $cpu_usage_user_sum
|
70 |
+
total_aviable_cpu=$(nproc)
|
71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
72 |
+
# echo $total_aviable_cpu
|
73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
74 |
+
# echo $cpu_usage_user_ratio
|
75 |
+
|
76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
77 |
+
# echo $memory_usage_user_sum
|
78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
79 |
+
# echo $memory_usage_total
|
80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
81 |
+
# echo $memory_usage_user_ratio
|
82 |
+
|
83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
88 |
+
break
|
89 |
+
else
|
90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
91 |
+
sleep 10
|
92 |
+
fi
|
93 |
+
done;
|
94 |
+
fi;
|
95 |
+
|
96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
97 |
+
while true; do
|
98 |
+
current_device_idx=$((current_device_idx+1))
|
99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
100 |
+
# reset
|
101 |
+
current_device_idx=0
|
102 |
+
fi
|
103 |
+
# check whether this device is fully booked using nvidia-smi
|
104 |
+
# get the gpu current memory usage
|
105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
107 |
+
|
108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
110 |
+
sleep 3
|
111 |
+
continue
|
112 |
+
else
|
113 |
+
break
|
114 |
+
fi
|
115 |
+
done
|
116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
117 |
+
device=${available_devices[$current_device_idx]}
|
118 |
+
}
|
res/20230615-17h56m22s/script.sh
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
2 |
+
|
3 |
+
##### setup
|
4 |
+
export CUDA_VISIBLE_DEVICES=2
|
5 |
+
source activate /data/yixin/anaconda/mib
|
6 |
+
exp_name="single_user"
|
7 |
+
#####
|
8 |
+
|
9 |
+
##### loop
|
10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
11 |
+
for dataset_idx in 0 1 2; do
|
12 |
+
#####
|
13 |
+
|
14 |
+
update_device_idx;
|
15 |
+
|
16 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
17 |
+
|
18 |
+
sleep $sleeptime
|
19 |
+
|
20 |
+
#####
|
21 |
+
|
22 |
+
#####
|
23 |
+
done;done;
|