meg's picture
meg HF Staff
Experimenting.
0351a9f verified
raw
history blame
2.02 kB
#!/bin/bash
# TODO: I think this flag is related to getting fail logs; check.
set -e
export SPACE="EnergyStarAI/launch-computation-example"
failed=0
echo "Not checking h100 -- already know it's not there."
#python /check_h100.py
echo "Attempting to run."
#if [[ $? = 0 ]]; then
python /parse_requests.py | while read line; do
IFS="," read backend_model experiment_name <<< $(echo ${line})
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
now=$(date +%Y-%m-%d-%H-%M-%S)
export run_dir="./runs/${experiment_name}/${backend_model}/${now}"
mkdir -p $run_dir
# Let the benchmarking begin!
optimum-benchmark --config-name ${experiment_name} --config-dir /optimum-benchmark/examples/energy_star/ backend.model=${backend_model} backend.processor=${backend_model} hydra.run.dir=${run_dir} 2> $run_dir/error.log
done || {
echo "Error."
failed=1
# TODO: Although this works, `curl` appears to run indefinitely because it is recording itself (the logs are recording the curl operation.)
#echo "Using curl to retrieve the space run log."
#logs_name=./runs/logs-${now}.txt
#curl https://api.hf.space/v1/${SPACE}/logs/run -H "Authorization: Bearer ${DEBUG}" > ${logs_name}
#python /failed_run.py --run_dir $run_dir --model_name $backend_model --logs_name $logs_name
}
if [ -s $run_dir/error.log ]; then
# error.log is not-empty, an error was raised
echo "An error was raised while benchmarking the model..."
python /failed_run.py --run_dir $run_dir --model_name $backend_model
# TODO: Is this necessary?
# Delete the current run directory so that it is not pushed by create_results.py later
rm -rf $run_dir
elif [ "$failed" -eq 1 ]; then
echo "Failed, but was not able to retrieve error log."
else
# The error log file is empty, and we didn't catch an error.
echo "Finished; uploading dataset results"
python /create_results.py ./runs
fi
# Pausing space
echo "Pausing space."
python /pause_space.py
echo "Done."