sheetbot / scripts /run_regularization.sh
linpershey's picture
major release - add pipeline & batch for difference use cases
07d2942
start_index=0
total_files=0
wait_time_in_seconds=10
for i in $(seq $start_index $total_files); do
# data_dir="data"/"gpt-4o-mini"/"output_0402_1_篩選結果.xlsx - Sheet1_$i"
# data_dir="data"/"gpt-4o-mini"/"output_0402_2_篩選結果.xlsx - Sheet1_$i"
data_dir="data"/"gpt-4o-mini"/"output_0402_3_篩選結果.xlsx - Sheet1_$i"
# data_dir="data"/"gpt-4o-mini"/"output_0402_4_篩選結果.xlsx - Sheet1_$i"
# data_dir="data/tmp"
# echo "Preparing batch data ..."
# python batch.py -t prepare_regularization \
# -erp "$data_dir"/extracted_results.joblib \
# -o "$data_dir"/regularization.jsonl
# -topn 200
# echo "Executing batch data ..."
# python batch.py -t run_batch \
# -i "$data_dir"/regularization.jsonl \
# -j "$data_dir"/reg_job.joblib \
# -jp "$data_dir"/reg_output.jsonl
# echo "Converting batch to extracted results ..."
# python model.py -t batch2extract \
# -jp "$data_dir"/output.jsonl \
# -crp "$data_dir"/crawled_results.joblib \
# -erp "$data_dir"/extracted_results.joblib
echo "Converting batch to regularized results ..."
python batch.py -t batch2reg \
-jp "$data_dir"/reg_output.jsonl \
-erp "$data_dir"/extracted_results.joblib \
-rrp "$data_dir"/regularized_results.joblib
sleep $wait_time_in_seconds
done
echo "All tasks completed."