File size: 867 Bytes
bb48ea5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from typing import *
from loguru import logger
from tqdm import tqdm
import pandas as pd
import datetime

import multiprocessing
from multiprocessing import Pool
cpu_num = multiprocessing.cpu_count()
logger.info(f"cpu_num: {cpu_num}")


date_str = datetime.datetime.now().strftime("%Y%m%d_%Hh%Mm%Ss")


def multiprocessing_mapping(

    mapping_func,

    items: List[Any],

    batch_size=1000,

    tmp_filepath=f"./output/multiprocessing_mapping_{date_str}_tmp.xlsx",

):
    pool = Pool(processes=cpu_num)
    total_rows: List[Dict[str, str]] = []
    for i in tqdm(range(0, len(items), batch_size)):
        new_rows = pool.map(mapping_func, items[i:i+batch_size])
        total_rows += new_rows
        df = pd.DataFrame(total_rows)
        df.to_excel(tmp_filepath, index=False)
    pool.close()
    pool.join()
    return total_rows