Spaces:
Runtime error
Runtime error
File size: 867 Bytes
bb48ea5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
from typing import *
from loguru import logger
from tqdm import tqdm
import pandas as pd
import datetime
import multiprocessing
from multiprocessing import Pool
cpu_num = multiprocessing.cpu_count()
logger.info(f"cpu_num: {cpu_num}")
date_str = datetime.datetime.now().strftime("%Y%m%d_%Hh%Mm%Ss")
def multiprocessing_mapping(
mapping_func,
items: List[Any],
batch_size=1000,
tmp_filepath=f"./output/multiprocessing_mapping_{date_str}_tmp.xlsx",
):
pool = Pool(processes=cpu_num)
total_rows: List[Dict[str, str]] = []
for i in tqdm(range(0, len(items), batch_size)):
new_rows = pool.map(mapping_func, items[i:i+batch_size])
total_rows += new_rows
df = pd.DataFrame(total_rows)
df.to_excel(tmp_filepath, index=False)
pool.close()
pool.join()
return total_rows
|