Yixin Liu commited on
Commit
0fad117
1 Parent(s): 05e4fe8
Files changed (4) hide show
  1. demo.ipynb +358 -0
  2. gpu_utility.sh +118 -0
  3. output.sh +58 -0
  4. test.txt +49 -0
demo.ipynb ADDED
@@ -0,0 +1,358 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 4,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "with open(\n",
10
+ " \"/Users/apple/Desktop/workspace/UsefulTool/exp-command-generator/test.txt\", 'r'\n",
11
+ ") as f:\n",
12
+ " contents = f.read()"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 5,
18
+ "metadata": {},
19
+ "outputs": [],
20
+ "source": [
21
+ "# find all \"#####\" indexes\n",
22
+ "import re\n",
23
+ "indexes = [m.start() for m in re.finditer('#####', contents)]"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 6,
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "assert len(indexes) % 2 == 0"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 7,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "# split to span\n",
42
+ "spans = []\n",
43
+ "# spans.append(contents[:indexes[0]])\n",
44
+ "for i in range(len(indexes)):\n",
45
+ " if i != len(indexes) - 1:\n",
46
+ " spans.append(contents[indexes[i]:indexes[i+1]])\n",
47
+ "# spans.append(contents[indexes[-1]:])"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": 8,
53
+ "metadata": {},
54
+ "outputs": [],
55
+ "source": [
56
+ "spans_with_type = [\n",
57
+ " \n",
58
+ "]\n",
59
+ "for span in spans:\n",
60
+ " if \"setup\" in span:\n",
61
+ " spans_with_type.append((span, \"setup\"))\n",
62
+ " elif \"loop\" in span:\n",
63
+ " spans_with_type.append((span, \"loop\"))\n",
64
+ " elif \"main\" in span:\n",
65
+ " spans_with_type.append((span, \"command\"))\n",
66
+ " else:\n",
67
+ " spans_with_type.append((span, \"other\"))\n",
68
+ " "
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": 9,
74
+ "metadata": {},
75
+ "outputs": [
76
+ {
77
+ "data": {
78
+ "text/plain": [
79
+ "[('##### setup\\n#!/bin/bash\\nfile_name=$(basename $0)\\ncurrent_path=$(pwd)\\ncd /data/yixin/workspace/unl-graph-usenix\\nsource activate /data/yixin/anaconda/unlg\\ndatasets=(\"IMDB-BINARY\" \"MUTAG\" \"ENZYMES\" \"IMDB-MULTI\" )\\nmodels=( \"gcn\" \"gin\" \"sage\" )\\nentity=\"mib-nlp\"\\nexp_name=\"adv-run-v3\"\\nbatch_size=8\\nmethods=( \"clean\" \"rand\" \"feat\" \"grad\" \"inject\" \"adv\")\\nwd=1e-5\\nadv_train_budgets=( 0.07 0.09 0.11 )\\ngen_exp_name=\"main-results-v2\"\\nlr=0.01\\nes_patience=40\\nseed_default=0\\noptimizer=\"adam\"\\nbudget=0.05\\ntotal_epoch=300\\nmax_steps=5000\\nseeds=(\"402\")\\n# mkdir $current_path/logs/ if not exist\\nmkdir -p $current_path/logs/\\nmkdir -p $current_path/logs/$exp_name\\n',\n",
80
+ " 'setup'),\n",
81
+ " ('#####\\n\\n\\n\\n', 'other'),\n",
82
+ " ('##### loop\\nfor adv_train_budget in \"${adv_train_budgets[@]}\"; do\\nfor dataset in \"${datasets[@]}\"; do\\nfor model in \"${models[@]}\"; do\\nfor method in \"${methods[@]}\"; do\\n',\n",
83
+ " 'loop'),\n",
84
+ " ('##### \\n\\n ', 'other'),\n",
85
+ " ('##### main\\n comb_command=\"for seed in ${seeds[@]} ; do nohup python eval.py --dataset $dataset --model ${model} --method ${method} --lr $lr --exp_name $exp_name --entity $entity --batch_size $batch_size --seed \\\\$seed --early_stop --num_epochs $total_epoch --wd $wd --device $device --es_patience $es_patience --optimizer $optimizer --max_steps $max_steps --adv_train --adv_train_budget $adv_train_budget --gen_exp_name $gen_exp_name > $current_path/logs/$exp_name/$dataset.$model.$method-\\\\$seed-$RANDOM$RANDOM.log 2>&1 ; done; \"\\n eval $comb_command & \\n \\n ',\n",
86
+ " 'command'),\n",
87
+ " ('##### \\n\\n', 'other'),\n",
88
+ " ('##### \\ndone;\\ndone;\\ndone;\\ndone;\\n', 'other')]"
89
+ ]
90
+ },
91
+ "execution_count": 9,
92
+ "metadata": {},
93
+ "output_type": "execute_result"
94
+ }
95
+ ],
96
+ "source": [
97
+ "spans_with_type"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 10,
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": [
106
+ "gpu_env = \"\"\"\n",
107
+ "username_mine=root\n",
108
+ "max_gpu_utilization=90\n",
109
+ "total_aviable=24564\n",
110
+ "max_gpu_memory_gap=5000\n",
111
+ "available_devices=( 0 1 2 3 4 5 6 7 8 9 )\n",
112
+ "current_device_idx=-1\n",
113
+ "sleeptime=30\n",
114
+ "cpu_mean_max=77\n",
115
+ "memory_rate_max=80\n",
116
+ "constrain_total=true\n",
117
+ "constrain_mine=false\n",
118
+ "constrain_rate=2\n",
119
+ "\"\"\""
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": 79,
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": [
128
+ "update_device_func = \"\"\"\n",
129
+ "function update_device_idx {\n",
130
+ " sleep $sleeptime\n",
131
+ " if [ $constrain_total = true ]; then\n",
132
+ " # check total cpu usage\n",
133
+ " while true; do\n",
134
+ " cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')\n",
135
+ " sleep 1\n",
136
+ " cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')\n",
137
+ " sleep 1\n",
138
+ " cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')\n",
139
+ " cpu_mean=$(echo \"scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3\" | bc)\n",
140
+ "\n",
141
+ " # if currently cpu usage is less than the threshold, then break\n",
142
+ " if [ $(echo \"$cpu_mean < $cpu_mean_max\" | bc) -eq 1 ]; then\n",
143
+ " echo \"total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage\"\n",
144
+ " break\n",
145
+ " else\n",
146
+ " echo \"total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds\"\n",
147
+ " sleep 10\n",
148
+ " fi\n",
149
+ " done;\n",
150
+ "\n",
151
+ " # check total memory usage\n",
152
+ " while true; do\n",
153
+ " # get memory usage of whole system\n",
154
+ " mem_used_1=$(free -m | awk '/Mem:/ {print $3}')\n",
155
+ " sleep 1\n",
156
+ " mem_used_2=$(free -m | awk '/Mem:/ {print $3}')\n",
157
+ " sleep 1\n",
158
+ " mem_used_3=$(free -m | awk '/Mem:/ {print $3}')\n",
159
+ " mem_used=$(echo \"scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3\" | bc)\n",
160
+ " \n",
161
+ " # echo $mem_used\n",
162
+ " # get rate of memory usage\n",
163
+ " mem_rate=$(echo \"scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100\" | bc)\n",
164
+ " # echo $mem_rate\n",
165
+ " if [ $(echo \"$mem_rate < $memory_rate_max\" | bc) -eq 1 ]; then\n",
166
+ " echo \"total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage\"\n",
167
+ " break\n",
168
+ " else\n",
169
+ " echo \"total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds\"\n",
170
+ " sleep 10\n",
171
+ " fi\n",
172
+ " done;\n",
173
+ " fi;\n",
174
+ "\n",
175
+ " # if constrain_mine\n",
176
+ " if [ $constrain_mine = true ]; then\n",
177
+ "\n",
178
+ " # check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max\n",
179
+ " while true; do\n",
180
+ " username=$username_mine\n",
181
+ " cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')\n",
182
+ " # echo $cpu_usage_user_sum\n",
183
+ " total_aviable_cpu=$(nproc)\n",
184
+ " total_aviable_cpu=$(echo \"$total_aviable_cpu*100\" | bc)\n",
185
+ " # echo $total_aviable_cpu\n",
186
+ " cpu_usage_user_ratio=$(echo \"scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100\" | bc)\n",
187
+ " # echo $cpu_usage_user_ratio\n",
188
+ "\n",
189
+ " memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')\n",
190
+ " # echo $memory_usage_user_sum\n",
191
+ " memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')\n",
192
+ " # echo $memory_usage_total\n",
193
+ " memory_usage_user_ratio=$(echo \"scale=2; $memory_usage_user_sum/$memory_usage_total*100\" | bc)\n",
194
+ " # echo $memory_usage_user_ratio\n",
195
+ "\n",
196
+ " # so my ratio should be less than 1/$constrain_rate of the given threshold\n",
197
+ " cpu_mean_max_mine=$(echo \"$cpu_mean_max/$constrain_rate\" | bc)\n",
198
+ " memory_rate_max_mine=$(echo \"$memory_rate_max/$constrain_rate\" | bc)\n",
199
+ " if [ $(echo \"$cpu_usage_user_ratio < $cpu_mean_max_mine\" | bc) -eq 1 ] && [ $(echo \"$memory_usage_user_ratio < $memory_rate_max_mine\" | bc) -eq 1 ]; then\n",
200
+ " echo \"my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off\"\n",
201
+ " break\n",
202
+ " else\n",
203
+ " echo \"my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds\"\n",
204
+ " sleep 10\n",
205
+ " fi\n",
206
+ " done;\n",
207
+ " fi;\n",
208
+ "\n",
209
+ " # so all the conditions are satisfied, we can update the device idx and run the next experiment\n",
210
+ " while true; do\n",
211
+ " current_device_idx=$((current_device_idx+1))\n",
212
+ " if [ $current_device_idx -ge ${#available_devices[@]} ]; then\n",
213
+ " # reset \n",
214
+ " current_device_idx=0\n",
215
+ " fi\n",
216
+ " # check whether this device is fully booked using nvidia-smi\n",
217
+ " # get the gpu current memory usage \n",
218
+ " useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})\n",
219
+ " utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})\n",
220
+ " \n",
221
+ " if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then\n",
222
+ " echo \"device ${available_devices[$current_device_idx]} is fully booked, try next one\"\n",
223
+ " sleep 3\n",
224
+ " continue\n",
225
+ " else\n",
226
+ " break\n",
227
+ " fi\n",
228
+ " done\n",
229
+ " echo \"current device: ${available_devices[$current_device_idx]}\"\n",
230
+ " device=${available_devices[$current_device_idx]}\n",
231
+ "}\n",
232
+ "\"\"\""
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "code",
237
+ "execution_count": 80,
238
+ "metadata": {},
239
+ "outputs": [],
240
+ "source": [
241
+ "update_device_command = \"update_device_idx;\\n\""
242
+ ]
243
+ },
244
+ {
245
+ "cell_type": "code",
246
+ "execution_count": null,
247
+ "metadata": {},
248
+ "outputs": [],
249
+ "source": []
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": 81,
254
+ "metadata": {},
255
+ "outputs": [],
256
+ "source": [
257
+ "backend_run = False"
258
+ ]
259
+ },
260
+ {
261
+ "cell_type": "code",
262
+ "execution_count": 82,
263
+ "metadata": {},
264
+ "outputs": [],
265
+ "source": [
266
+ "gpu_utility = \"\"\n",
267
+ "gpu_utility = gpu_env + \"\\n\\n\" + update_device_func \n",
268
+ "with open(\"gpu_utility.sh\", 'w') as f:\n",
269
+ " f.write(gpu_utility)"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "code",
274
+ "execution_count": 83,
275
+ "metadata": {},
276
+ "outputs": [],
277
+ "source": [
278
+ "spans_with_type_added_device_control = []\n",
279
+ "\n",
280
+ "for span, type_ in spans_with_type:\n",
281
+ " if type_ == \"setup\":\n",
282
+ " spans_with_type_added_device_control.append((\n",
283
+ " \"\"\"cd $(cd \"$(dirname \"$0\")\";pwd); source gpu_utility.sh\\n\\n\"\"\"\n",
284
+ " , \"device_control\"))\n",
285
+ " spans_with_type_added_device_control.append((span, type_))\n",
286
+ " # spans_with_type_added_device_control.append((gpu_env, \"device_control\"))\n",
287
+ " # spans_with_type_added_device_control.append((update_device_func, \"device_control\"))\n",
288
+ " elif type_ == \"loop\":\n",
289
+ " spans_with_type_added_device_control.append((span, type_))\n",
290
+ " elif type_ == \"command\":\n",
291
+ " spans_with_type_added_device_control.append((\"\\n\"+update_device_command, \"device_control\"))\n",
292
+ " span_remove_the_first_part = span[span.index(\"\\n\"):]\n",
293
+ " spans_with_type_added_device_control.append((f\"\\n\\ncommand=\\\"\\\"\\\"{span_remove_the_first_part}\\\"\\\"\\\"\\n\", type_))\n",
294
+ " run_command = \"eval $command\"\n",
295
+ " if backend_run:\n",
296
+ " run_command += \" &\"\n",
297
+ " run_command += \"\\n\\n\\n\"\n",
298
+ " spans_with_type_added_device_control.append((run_command, type_))\n",
299
+ " else:\n",
300
+ " spans_with_type_added_device_control.append((span, type_))\n",
301
+ "spans_without_type = [span for span, type_ in spans_with_type_added_device_control]\n",
302
+ "spans_without_type_str = \"\".join(spans_without_type)\n",
303
+ "with open(\"./output.sh\", 'w') as f:\n",
304
+ " f.write(spans_without_type_str)"
305
+ ]
306
+ },
307
+ {
308
+ "cell_type": "code",
309
+ "execution_count": null,
310
+ "metadata": {},
311
+ "outputs": [],
312
+ "source": []
313
+ },
314
+ {
315
+ "cell_type": "code",
316
+ "execution_count": null,
317
+ "metadata": {},
318
+ "outputs": [],
319
+ "source": []
320
+ },
321
+ {
322
+ "cell_type": "code",
323
+ "execution_count": null,
324
+ "metadata": {},
325
+ "outputs": [],
326
+ "source": []
327
+ },
328
+ {
329
+ "cell_type": "code",
330
+ "execution_count": null,
331
+ "metadata": {},
332
+ "outputs": [],
333
+ "source": []
334
+ }
335
+ ],
336
+ "metadata": {
337
+ "kernelspec": {
338
+ "display_name": "base",
339
+ "language": "python",
340
+ "name": "python3"
341
+ },
342
+ "language_info": {
343
+ "codemirror_mode": {
344
+ "name": "ipython",
345
+ "version": 3
346
+ },
347
+ "file_extension": ".py",
348
+ "mimetype": "text/x-python",
349
+ "name": "python",
350
+ "nbconvert_exporter": "python",
351
+ "pygments_lexer": "ipython3",
352
+ "version": "3.9.13"
353
+ },
354
+ "orig_nbformat": 4
355
+ },
356
+ "nbformat": 4,
357
+ "nbformat_minor": 2
358
+ }
gpu_utility.sh ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ username_mine=root
3
+ max_gpu_utilization=90
4
+ total_aviable=24564
5
+ max_gpu_memory_gap=5000
6
+ available_devices=( 0 1 2 3 4 5 6 7 8 9 )
7
+ current_device_idx=-1
8
+ sleeptime=30
9
+ cpu_mean_max=77
10
+ memory_rate_max=80
11
+ constrain_total=true
12
+ constrain_mine=false
13
+ constrain_rate=2
14
+
15
+
16
+
17
+ function update_device_idx {
18
+ if [ $constrain_total = true ]; then
19
+ # check total cpu usage
20
+ while true; do
21
+ cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
22
+ sleep 1
23
+ cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
24
+ sleep 1
25
+ cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
26
+ cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
27
+
28
+ # if currently cpu usage is less than the threshold, then break
29
+ if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
30
+ echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
31
+ break
32
+ else
33
+ echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
34
+ sleep 10
35
+ fi
36
+ done;
37
+
38
+ # check total memory usage
39
+ while true; do
40
+ # get memory usage of whole system
41
+ mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
42
+ sleep 1
43
+ mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
44
+ sleep 1
45
+ mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
46
+ mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
47
+
48
+ # echo $mem_used
49
+ # get rate of memory usage
50
+ mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
51
+ # echo $mem_rate
52
+ if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
53
+ echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
54
+ break
55
+ else
56
+ echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
57
+ sleep 10
58
+ fi
59
+ done;
60
+ fi;
61
+
62
+ # if constrain_mine
63
+ if [ $constrain_mine = true ]; then
64
+
65
+ # check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
66
+ while true; do
67
+ username=$username_mine
68
+ cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
69
+ # echo $cpu_usage_user_sum
70
+ total_aviable_cpu=$(nproc)
71
+ total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
72
+ # echo $total_aviable_cpu
73
+ cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
74
+ # echo $cpu_usage_user_ratio
75
+
76
+ memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
77
+ # echo $memory_usage_user_sum
78
+ memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
79
+ # echo $memory_usage_total
80
+ memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
81
+ # echo $memory_usage_user_ratio
82
+
83
+ # so my ratio should be less than 1/$constrain_rate of the given threshold
84
+ cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
85
+ memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
86
+ if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
87
+ echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
88
+ break
89
+ else
90
+ echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
91
+ sleep 10
92
+ fi
93
+ done;
94
+ fi;
95
+
96
+ # so all the conditions are satisfied, we can update the device idx and run the next experiment
97
+ while true; do
98
+ current_device_idx=$((current_device_idx+1))
99
+ if [ $current_device_idx -ge ${#available_devices[@]} ]; then
100
+ # reset
101
+ current_device_idx=0
102
+ fi
103
+ # check whether this device is fully booked using nvidia-smi
104
+ # get the gpu current memory usage
105
+ useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
106
+ utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
107
+
108
+ if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
109
+ echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
110
+ sleep 3
111
+ continue
112
+ else
113
+ break
114
+ fi
115
+ done
116
+ echo "current device: ${available_devices[$current_device_idx]}"
117
+ device=${available_devices[$current_device_idx]}
118
+ }
output.sh ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
2
+
3
+ ##### setup
4
+ #!/bin/bash
5
+ file_name=$(basename $0)
6
+ current_path=$(pwd)
7
+ cd /data/yixin/workspace/unl-graph-usenix
8
+ source activate /data/yixin/anaconda/unlg
9
+ datasets=("IMDB-BINARY" "MUTAG" "ENZYMES" "IMDB-MULTI" )
10
+ models=( "gcn" "gin" "sage" )
11
+ entity="mib-nlp"
12
+ exp_name="adv-run-v3"
13
+ batch_size=8
14
+ methods=( "clean" "rand" "feat" "grad" "inject" "adv")
15
+ wd=1e-5
16
+ adv_train_budgets=( 0.07 0.09 0.11 )
17
+ gen_exp_name="main-results-v2"
18
+ lr=0.01
19
+ es_patience=40
20
+ seed_default=0
21
+ optimizer="adam"
22
+ budget=0.05
23
+ total_epoch=300
24
+ max_steps=5000
25
+ seeds=("402")
26
+ # mkdir $current_path/logs/ if not exist
27
+ mkdir -p $current_path/logs/
28
+ mkdir -p $current_path/logs/$exp_name
29
+ #####
30
+
31
+
32
+
33
+ ##### loop
34
+ for adv_train_budget in "${adv_train_budgets[@]}"; do
35
+ for dataset in "${datasets[@]}"; do
36
+ for model in "${models[@]}"; do
37
+ for method in "${methods[@]}"; do
38
+ #####
39
+
40
+
41
+ update_device_idx;
42
+
43
+
44
+ command="""
45
+ comb_command="for seed in ${seeds[@]} ; do nohup python eval.py --dataset $dataset --model ${model} --method ${method} --lr $lr --exp_name $exp_name --entity $entity --batch_size $batch_size --seed \$seed --early_stop --num_epochs $total_epoch --wd $wd --device $device --es_patience $es_patience --optimizer $optimizer --max_steps $max_steps --adv_train --adv_train_budget $adv_train_budget --gen_exp_name $gen_exp_name > $current_path/logs/$exp_name/$dataset.$model.$method-\$seed-$RANDOM$RANDOM.log 2>&1 ; done; "
46
+ eval $comb_command &
47
+
48
+ """
49
+ eval $command
50
+
51
+
52
+ #####
53
+
54
+ #####
55
+ done;
56
+ done;
57
+ done;
58
+ done;
test.txt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##### setup
2
+ #!/bin/bash
3
+ file_name=$(basename $0)
4
+ current_path=$(pwd)
5
+ cd /data/yixin/workspace/unl-graph-usenix
6
+ source activate /data/yixin/anaconda/unlg
7
+ datasets=("IMDB-BINARY" "MUTAG" "ENZYMES" "IMDB-MULTI" )
8
+ models=( "gcn" "gin" "sage" )
9
+ entity="mib-nlp"
10
+ exp_name="adv-run-v3"
11
+ batch_size=8
12
+ methods=( "clean" "rand" "feat" "grad" "inject" "adv")
13
+ wd=1e-5
14
+ adv_train_budgets=( 0.07 0.09 0.11 )
15
+ gen_exp_name="main-results-v2"
16
+ lr=0.01
17
+ es_patience=40
18
+ seed_default=0
19
+ optimizer="adam"
20
+ budget=0.05
21
+ total_epoch=300
22
+ max_steps=5000
23
+ seeds=("402")
24
+ # mkdir $current_path/logs/ if not exist
25
+ mkdir -p $current_path/logs/
26
+ mkdir -p $current_path/logs/$exp_name
27
+ #####
28
+
29
+
30
+
31
+ ##### loop
32
+ for adv_train_budget in "${adv_train_budgets[@]}"; do
33
+ for dataset in "${datasets[@]}"; do
34
+ for model in "${models[@]}"; do
35
+ for method in "${methods[@]}"; do
36
+ #####
37
+
38
+ ##### main
39
+ comb_command="for seed in ${seeds[@]} ; do nohup python eval.py --dataset $dataset --model ${model} --method ${method} --lr $lr --exp_name $exp_name --entity $entity --batch_size $batch_size --seed \$seed --early_stop --num_epochs $total_epoch --wd $wd --device $device --es_patience $es_patience --optimizer $optimizer --max_steps $max_steps --adv_train --adv_train_budget $adv_train_budget --gen_exp_name $gen_exp_name > $current_path/logs/$exp_name/$dataset.$model.$method-\$seed-$RANDOM$RANDOM.log 2>&1 ; done; "
40
+ eval $comb_command &
41
+
42
+ #####
43
+
44
+ #####
45
+ done;
46
+ done;
47
+ done;
48
+ done;
49
+ #####