#!/bin/bash # 設定檔案的基礎名稱 # base_file="output_0402_1_篩選結果.xlsx - Sheet1_" # base_file="output_0402_2_篩選結果.xlsx - Sheet1_" # base_file="output_0402_3_篩選結果.xlsx - Sheet1_" base_file="output_0402_4_篩選結果.xlsx - Sheet1_" # 設定總共要處理的檔案數量 start_index=0 total_files=17 # 設定每次處理消耗的數量和 API 限制 # consumption_per_run=1000 # api_rate_limit=3000 api_rate_limit=20000 wait_time_in_seconds=60 # 1500 # 25 mins # 迴圈執行 for i in $(seq $start_index $total_files); do # 動態生成檔案名稱 file_name="${base_file}${i}.csv" crawled_file_path="${base_file}${i}/crawled_results.joblib" # 執行 python 指令 python sheet.py --data_path "data/production/${file_name}" --task new \ --step crawl \ --output_dir data/gpt-4o-mini \ --n_processes 4 \ --serp_provider serp \ --crawled_file_path "${crawled_file_path}" \ --extraction_provider openai \ --extraction_model gpt-4o-mini \ --regularization_provider openai \ --regularization_model gpt-4o-mini # 等待以避免 API rate limit echo "Completed task for ${file_name}. Waiting for ${wait_time_in_seconds} seconds..." sleep $wait_time_in_seconds done echo "All tasks completed."