oh-my-words / common /util.py
linxy's picture
init
bb48ea5
raw
history blame contribute delete
867 Bytes
from typing import *
from loguru import logger
from tqdm import tqdm
import pandas as pd
import datetime
import multiprocessing
from multiprocessing import Pool
cpu_num = multiprocessing.cpu_count()
logger.info(f"cpu_num: {cpu_num}")
date_str = datetime.datetime.now().strftime("%Y%m%d_%Hh%Mm%Ss")
def multiprocessing_mapping(
mapping_func,
items: List[Any],
batch_size=1000,
tmp_filepath=f"./output/multiprocessing_mapping_{date_str}_tmp.xlsx",
):
pool = Pool(processes=cpu_num)
total_rows: List[Dict[str, str]] = []
for i in tqdm(range(0, len(items), batch_size)):
new_rows = pool.map(mapping_func, items[i:i+batch_size])
total_rows += new_rows
df = pd.DataFrame(total_rows)
df.to_excel(tmp_filepath, index=False)
pool.close()
pool.join()
return total_rows