# import os # import pandas as pd # from PIL import Image, ImageOps # import numpy as np # from tqdm import tqdm # from multiprocessing import Pool, cpu_count # # 读取CSV文件 # csv_path = '/data/cjm/FungiCLEF2024/Dataset/06_new_train_valmetadata.csv' # data = pd.read_csv(csv_path) # # 设置根目录 # root_dir = '/data/cjm/FungiCLEF2024/Dataset/DF20_21_300' # # 过滤poisonous为1的数据 # poisonous_data = data[data['poisonous'] == 1] # # 创建保存增强数据的DataFrame,并包含原始数据 # new_data = data.copy() # # 定义数据增强函数 # def augment_image(args): # row, root_dir = args # image_path = row['image_path'] # full_path = os.path.join(root_dir, image_path) # augmented_rows = [] # if os.path.exists(full_path): # image = Image.open(full_path) # w, h = image.size # # 定义旋转和翻转操作 # transformations = { # 'r90': image.rotate(90, expand=True), # 'r180': image.rotate(180, expand=True), # 'r270': image.rotate(270, expand=True), # 'fh': ImageOps.mirror(image), # 'fv': ImageOps.flip(image), # } # for suffix, img in transformations.items(): # # 裁剪图片以去除旋转后的黑边 # if suffix in ['r90', 'r270']: # img = img.crop((0, 0, h, w)) # new_image_path = os.path.splitext(image_path)[0] + f'_{suffix}.JPG' # new_full_path = os.path.join(root_dir, new_image_path) # img.save(new_full_path) # new_row = row.copy() # new_row['image_path'] = new_image_path # augmented_rows.append(new_row) # return augmented_rows # # 准备多进程处理 # num_processes = cpu_count() # pool = Pool(processes=num_processes) # # 使用tqdm显示进度 # augmented_data = [] # for augmented_rows in tqdm(pool.imap_unordered(augment_image, [(row, root_dir) for _, row in poisonous_data.iterrows()]), total=len(poisonous_data)): # augmented_data.extend(augmented_rows) # # 关闭进程池 # pool.close() # pool.join() # # 将增强后的数据添加到new_data中 # new_data = new_data.append(augmented_data, ignore_index=True) # # 将数据保存到新的CSV文件中 # new_csv_path = '/data/cjm/FungiCLEF2024/Dataset/07_new_train_valmetadata.csv' # new_data.to_csv(new_csv_path, index=False) import os import pandas as pd from PIL import Image, ImageOps import numpy as np from tqdm import tqdm from multiprocessing import Pool, cpu_count import random # 读取CSV文件 csv_path = '/data/cjm/FungiCLEF2024/Dataset/06_new_train_valmetadata.csv' data = pd.read_csv(csv_path) # 设置根目录 root_dir = '/data/cjm/FungiCLEF2024/Dataset/DF20_21_300' # 过滤poisonous为1的数据 poisonous_data = data[data['poisonous'] == 1] # 创建保存增强数据的DataFrame,并包含原始数据 new_data = data.copy() # 定义数据增强函数 def augment_image(args): row, root_dir = args image_path = row['image_path'] full_path = os.path.join(root_dir, image_path) augmented_rows = [] if os.path.exists(full_path): image = Image.open(full_path) w, h = image.size # 定义旋转和翻转操作 transformations = { 'r90': image.rotate(90, expand=True), 'r180': image.rotate(180, expand=True), 'r270': image.rotate(270, expand=True), 'fh': ImageOps.mirror(image), 'fv': ImageOps.flip(image), } # 添加随机裁剪操作 for i in range(4): rand = random.uniform(0.7, 0.8) new_w = int(w * rand) new_h = int(h * rand) left = random.randint(0, w - new_w) top = random.randint(0, h - new_h) right = left + new_w bottom = top + new_h cropped_image = image.crop((left, top, right, bottom)) # cropped_image = cropped_image.resize((w, h)) # 调整回原始尺寸 new_image_path = os.path.splitext(image_path)[0] + f'_crop{rand}.JPG' new_full_path = os.path.join(root_dir, new_image_path) cropped_image.save(new_full_path) new_row = row.copy() new_row['image_path'] = new_image_path augmented_rows.append(new_row) for suffix, img in transformations.items(): # 裁剪图片以去除旋转后的黑边 if suffix in ['r90', 'r270']: img = img.crop((0, 0, h, w)) new_image_path = os.path.splitext(image_path)[0] + f'_{suffix}.JPG' new_full_path = os.path.join(root_dir, new_image_path) img.save(new_full_path) new_row = row.copy() new_row['image_path'] = new_image_path augmented_rows.append(new_row) return augmented_rows # 准备多进程处理 num_processes = cpu_count() pool = Pool(processes=num_processes) # 使用tqdm显示进度 augmented_data = [] for augmented_rows in tqdm(pool.imap_unordered(augment_image, [(row, root_dir) for _, row in poisonous_data.iterrows()]), total=len(poisonous_data)): augmented_data.extend(augmented_rows) # 关闭进程池 pool.close() pool.join() # 将增强后的数据添加到new_data中 new_data = new_data.append(augmented_data, ignore_index=True) # 将数据保存到新的CSV文件中 new_csv_path = '/data/cjm/FungiCLEF2024/Dataset/07_new_train_valmetadata.csv' new_data.to_csv(new_csv_path, index=False)