Yixin Liu commited on
Commit
c4ef4f9
1 Parent(s): 23b9bfb

update readme

Browse files
Files changed (4) hide show
  1. Procfile +1 -0
  2. main.py +126 -0
  3. requirements.txt +3 -0
  4. setup.sh +13 -0
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: sh setup.sh && streamlit run main.py
main.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import imp
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ import time
6
+ # import matplotlib.pyplot as plt
7
+ # import seaborn as sns
8
+ # import plotly.figure_factory as ff
9
+ # import altair as alt
10
+ # from PIL import Image
11
+ # import base64
12
+ # import tarfile
13
+ # import os
14
+ # import requests
15
+
16
+
17
+
18
+ # title
19
+ st.title("Exp Command Generator")
20
+
21
+ ## 检查框
22
+ debug = st.checkbox("Debug:选择则会串行地执行命令", value=True)
23
+ # st.write(f"checkbox的值是{res}")
24
+
25
+ setup = st.text_area("Some setup of env at beginning.", """cd $(dirname $(dirname $0))
26
+ source activate xai
27
+ export PYTHONPATH=${PYTHONPATH}:/Users/apple/Desktop/workspace/research_project/attention:/mnt/yixin/:/home/yila22/prj""")
28
+
29
+ exp_hyper = st.text_area("Hyperparameters", """exp_name="debug-adv-training-emotion"
30
+ dataset=emotion
31
+ n_epoch=3
32
+ K=3
33
+ encoder=bert
34
+ lambda_1=1
35
+ lambda_2=1
36
+ x_pgd_radius=0.01
37
+ pgd_radius=0.001
38
+ seed=2
39
+ bsize=8
40
+ lr=5e-5""")
41
+
42
+ ## gpu 相关参数
43
+ gpu_list = st.multiselect("multi select", range(10), [1, 2, 3, 4, 5, 6, 7, 8, 9])
44
+ print(gpu_list)
45
+ allow_gpu_memory_threshold = st.number_input("最小单卡剩余容量", value=5000, min_value=0, max_value=30000, step=1000)
46
+ gpu_threshold = st.number_input("最大单卡利用率", value=70, min_value=0, max_value=100, step=10)
47
+ sleep_time_after_loading_task= st.number_input("加载任务后等待秒数", value=20, min_value=0,step=5)
48
+ all_full_sleep_time = st.number_input("全满之后等待秒数", value=20, min_value=0,step=5)
49
+
50
+ gpu_list_str = ' '.join([str(i) for i in gpu_list])
51
+ gpu_hyper = f"gpu=({gpu_list_str})\n"
52
+ gpu_hyper+=f"allow_gpu_memory_threshold={allow_gpu_memory_threshold}\n"
53
+ gpu_hyper+=f"gpu_threshold={gpu_threshold}\n"
54
+ gpu_hyper+=f"sleep_time_after_loading_task={sleep_time_after_loading_task}s\n"
55
+ gpu_hyper+=f"all_full_sleep_time={all_full_sleep_time}s\n"
56
+ gpu_hyper+=f"gpunum={len(gpu_list)}\n"
57
+
58
+ main_loop = st.text_area("Main loop", """for lambda_1 in 1 3;do
59
+ for lambda_2 in 1 10;do
60
+ for n_epoch in 3;do
61
+ for x_pgd_radius in 0.005 0.01;do
62
+ for pgd_radius in 0.0005 0.001 0.002;do
63
+ python train.py --dataset $dataset --data_dir . --output_dir ./outputs/ --attention tanh \
64
+ --encoder $encoder \
65
+ --exp_name $exp_name --lambda_1 $lambda_1 --lambda_2 $lambda_2 --pgd_radius $pgd_radius --x_pgd_radius $x_pgd_radius \
66
+ --K $K --seed $seed --train_mode adv_train --bsize $bsize --n_epoch $n_epoch --lr $lr \
67
+ --eval_baseline
68
+ done;done;done;done;done;""")
69
+
70
+ hyper_loop = main_loop.split("python")[0]
71
+ print(hyper_loop)
72
+ python_cmd = main_loop.split(";do\n")[-1].split('done;')[0]
73
+ print(python_cmd)
74
+ end_loop = "done;"*hyper_loop.count("\n")
75
+ print(end_loop)
76
+
77
+
78
+ g = st.button("Generate")
79
+ if g:
80
+ s = ""
81
+ s += setup + "\n\n"
82
+ s += exp_hyper + "\n\n"
83
+ s += gpu_hyper + "\n\n"
84
+ s += hyper_loop + "\n\n"
85
+ s += """
86
+ i=0 # we search from the first gpu
87
+ while true; do
88
+ gpu_id=${gpu[$i]}
89
+ # nvidia-smi --query-gpu=utilization.gpu --format=csv -i 2 | grep -Eo "[0-9]+"
90
+ gpu_u=$(nvidia-smi --query-gpu=utilization.gpu --format=csv -i $gpu_id | grep -Eo "[0-9]+")
91
+ free_mem=$(nvidia-smi --query-gpu=memory.free --format=csv -i $gpu_id | grep -Eo "[0-9]+")
92
+ if [[ $free_mem -lt $allow_gpu_memory_threshold || $gpu_u -ge ${gpu_threshold} ]]; then
93
+ i=`expr $i + 1`
94
+ i=`expr $i % $gpunum`
95
+ echo "gpu id ${gpu[$i]} is full loaded, skip"
96
+ if [ "$i" == "0" ]; then
97
+ sleep ${all_full_sleep_time}
98
+ echo "all the gpus are full, sleep 1m"
99
+ fi
100
+ else
101
+ break
102
+ fi
103
+ done
104
+
105
+ gpu_id=${gpu[$i]}
106
+ free_mem=$(nvidia-smi --query-gpu=memory.free --format=csv -i $gpu_id | grep -Eo "[0-9]+")
107
+ gpu_u=$(nvidia-smi --query-gpu=utilization.gpu --format=csv -i $gpu_id | grep -Eo "[0-9]+")
108
+ export CUDA_VISIBLE_DEVICES=$gpu_id
109
+ echo "use gpu id is ${gpu[$i]}, free memory is $free_mem, it utilization is ${gpu_u}%"
110
+ """
111
+ s += f"""com="{python_cmd}"\n"""
112
+ s += "echo $com\n"
113
+ s += "echo ==========================================================================================\n"
114
+ if debug:
115
+ s += "$com\n"
116
+ else:
117
+ s += "mkdir -p ./logs/\n"
118
+ s += "nohup $com > ./logs/$exp_name-$RANDOM.log 2>&1 &\n"
119
+ s += """echo "sleep for $sleep_time_after_loading_task to wait the task loaded"
120
+ sleep $sleep_time_after_loading_task\n"""
121
+ s += end_loop
122
+ st.success("Finished")
123
+ st.code(s, language="shell")
124
+
125
+
126
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ numpy
2
+ streamlit
3
+ pandas
setup.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mkdir -p ~/.streamlit/
2
+
3
+ echo "\
4
+ [general]\n\
5
+ email = \"[email protected]\"\n\
6
+ " > ~/.streamlit/credentials.toml
7
+
8
+ echo "\
9
+ [server]\n\
10
+ headless = true\n\
11
+ enableCORS=false\n\
12
+ port = $PORT\n\
13
+ " > ~/.streamlit/config.toml