sheetbot / scripts /run_batch.sh
linpershey's picture
major release - add pipeline & batch for difference use cases
07d2942
start_index=0
total_files=17
wait_time_in_seconds=10
for i in $(seq $start_index $total_files); do
# data_dir="data"/"gpt-4o-mini"/"output_0402_1_篩選結果.xlsx - Sheet1_$i"
# data_dir="data"/"gpt-4o-mini"/"output_0402_2_篩選結果.xlsx - Sheet1_$i"
# data_dir="data"/"gpt-4o-mini"/"output_0402_3_篩選結果.xlsx - Sheet1_$i"
data_dir="data"/"gpt-4o-mini"/"output_0402_4_篩選結果.xlsx - Sheet1_$i"
# data_dir="data/tmp"
echo "Preparing batch data ..."
python batch.py -t prepare_batch \
-crp "$data_dir"/crawled_results.joblib \
-o "$data_dir"/batch.jsonl
# -topn 200
echo "Executing batch data ..."
python batch.py -t run_batch \
-i "$data_dir"/batch.jsonl \
-j "$data_dir"/job.joblib \
-jp "$data_dir"/output.jsonl
echo "Converting batch to extracted results ..."
python batch.py -t batch2extract \
-jp "$data_dir"/output.jsonl \
-crp "$data_dir"/crawled_results.joblib \
-erp "$data_dir"/extracted_results.joblib
sleep $wait_time_in_seconds
done
echo "All tasks completed."