Spaces:

yixin
/

Experiment-Command-Generator

Runtime error

App Files Files Community

Yixin Liu commited on Aug 6, 2022

Commit

c4ef4f9

1 Parent(s): 23b9bfb

update readme

Browse files

Files changed (4) hide show

Procfile +1 -0
main.py +126 -0
requirements.txt +3 -0
setup.sh +13 -0

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: sh setup.sh && streamlit run main.py

main.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# import imp
+import streamlit as st
+import pandas as pd
+import numpy as np
+import time
+# import matplotlib.pyplot as plt
+# import seaborn as sns
+# import plotly.figure_factory as ff
+# import altair as alt
+# from PIL import Image
+# import base64
+# import tarfile
+# import os
+# import requests
+# title
+st.title("Exp Command Generator")
+## 检查框
+debug = st.checkbox("Debug:选择则会串行地执行命令", value=True)
+# st.write(f"checkbox的值是{res}")
+setup = st.text_area("Some setup of env at beginning.", """cd $(dirname $(dirname $0))
+source activate xai
+export PYTHONPATH=${PYTHONPATH}:/Users/apple/Desktop/workspace/research_project/attention:/mnt/yixin/:/home/yila22/prj""")
+exp_hyper = st.text_area("Hyperparameters", """exp_name="debug-adv-training-emotion"
+dataset=emotion
+n_epoch=3
+K=3
+encoder=bert
+lambda_1=1
+lambda_2=1
+x_pgd_radius=0.01
+pgd_radius=0.001
+seed=2
+bsize=8
+lr=5e-5""")
+## gpu 相关参数
+gpu_list = st.multiselect("multi select", range(10), [1, 2, 3, 4, 5, 6, 7, 8, 9])
+print(gpu_list)
+allow_gpu_memory_threshold = st.number_input("最小单卡剩余容量", value=5000, min_value=0, max_value=30000, step=1000)
+gpu_threshold = st.number_input("最大单卡利用率", value=70, min_value=0, max_value=100, step=10)
+sleep_time_after_loading_task= st.number_input("加载任务后等待秒数", value=20, min_value=0,step=5)
+all_full_sleep_time = st.number_input("全满之后等待秒数", value=20, min_value=0,step=5)
+gpu_list_str = ' '.join([str(i) for i in gpu_list])
+gpu_hyper = f"gpu=({gpu_list_str})\n"
+gpu_hyper+=f"allow_gpu_memory_threshold={allow_gpu_memory_threshold}\n"
+gpu_hyper+=f"gpu_threshold={gpu_threshold}\n"
+gpu_hyper+=f"sleep_time_after_loading_task={sleep_time_after_loading_task}s\n"
+gpu_hyper+=f"all_full_sleep_time={all_full_sleep_time}s\n"
+gpu_hyper+=f"gpunum={len(gpu_list)}\n"
+main_loop = st.text_area("Main loop", """for lambda_1 in 1 3;do
+  for lambda_2 in 1 10;do
+    for n_epoch in 3;do
+      for x_pgd_radius in 0.005 0.01;do
+        for pgd_radius in 0.0005 0.001 0.002;do
+          python train.py --dataset $dataset --data_dir . --output_dir ./outputs/ --attention tanh \
+              --encoder $encoder \
+                --exp_name $exp_name --lambda_1 $lambda_1 --lambda_2 $lambda_2 --pgd_radius $pgd_radius --x_pgd_radius $x_pgd_radius \
+                --K $K  --seed $seed --train_mode adv_train --bsize $bsize --n_epoch $n_epoch --lr $lr \
+                --eval_baseline
+done;done;done;done;done;""")
+hyper_loop = main_loop.split("python")[0]
+print(hyper_loop)
+python_cmd = main_loop.split(";do\n")[-1].split('done;')[0]
+print(python_cmd)
+end_loop = "done;"*hyper_loop.count("\n")
+print(end_loop)
+g = st.button("Generate")
+if g:
+    s = ""
+    s += setup + "\n\n"
+    s += exp_hyper + "\n\n"
+    s += gpu_hyper + "\n\n"
+    s += hyper_loop + "\n\n"
+    s += """
+i=0 # we search from the first gpu
+while true; do
+    gpu_id=${gpu[$i]}
+#    nvidia-smi --query-gpu=utilization.gpu  --format=csv -i 2 | grep -Eo "[0-9]+"
+    gpu_u=$(nvidia-smi --query-gpu=utilization.gpu  --format=csv -i $gpu_id | grep -Eo "[0-9]+")
+    free_mem=$(nvidia-smi --query-gpu=memory.free --format=csv -i $gpu_id | grep -Eo "[0-9]+")
+    if [[ $free_mem -lt $allow_gpu_memory_threshold || $gpu_u -ge ${gpu_threshold} ]]; then
+        i=`expr $i + 1`
+        i=`expr $i % $gpunum`
+        echo "gpu id ${gpu[$i]} is full loaded, skip"
+        if [ "$i" == "0" ]; then
+            sleep ${all_full_sleep_time}
+            echo "all the gpus are full, sleep 1m"
+        fi
+    else
+        break
+    fi
+done
+gpu_id=${gpu[$i]}
+free_mem=$(nvidia-smi --query-gpu=memory.free --format=csv -i $gpu_id | grep -Eo "[0-9]+")
+gpu_u=$(nvidia-smi --query-gpu=utilization.gpu  --format=csv -i $gpu_id | grep -Eo "[0-9]+")
+export CUDA_VISIBLE_DEVICES=$gpu_id
+echo "use gpu id is ${gpu[$i]}, free memory is $free_mem, it utilization is ${gpu_u}%"
+"""
+    s += f"""com="{python_cmd}"\n"""
+    s += "echo $com\n"
+    s += "echo ==========================================================================================\n"
+    if debug:
+        s += "$com\n"
+    else:
+        s += "mkdir -p ./logs/\n"
+        s += "nohup $com > ./logs/$exp_name-$RANDOM.log 2>&1 &\n"
+    s += """echo "sleep for $sleep_time_after_loading_task to wait the task loaded"
+    sleep  $sleep_time_after_loading_task\n"""
+    s += end_loop
+    st.success("Finished")
+    st.code(s, language="shell")

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+numpy
+streamlit
+pandas

setup.sh ADDED Viewed

	@@ -0,0 +1,13 @@

+mkdir -p ~/.streamlit/
+echo "\
+[general]\n\
+email = \"[email protected]\"\n\
+" > ~/.streamlit/credentials.toml
+echo "\
+[server]\n\
+headless = true\n\
+enableCORS=false\n\
+port = $PORT\n\
+" > ~/.streamlit/config.toml