File size: 5,509 Bytes
863fd3a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
# import os
# import pandas as pd
# from PIL import Image, ImageOps
# import numpy as np
# from tqdm import tqdm
# from multiprocessing import Pool, cpu_count
# # 读取CSV文件
# csv_path = '/data/cjm/FungiCLEF2024/Dataset/06_new_train_valmetadata.csv'
# data = pd.read_csv(csv_path)
# # 设置根目录
# root_dir = '/data/cjm/FungiCLEF2024/Dataset/DF20_21_300'
# # 过滤poisonous为1的数据
# poisonous_data = data[data['poisonous'] == 1]
# # 创建保存增强数据的DataFrame,并包含原始数据
# new_data = data.copy()
# # 定义数据增强函数
# def augment_image(args):
# row, root_dir = args
# image_path = row['image_path']
# full_path = os.path.join(root_dir, image_path)
# augmented_rows = []
# if os.path.exists(full_path):
# image = Image.open(full_path)
# w, h = image.size
# # 定义旋转和翻转操作
# transformations = {
# 'r90': image.rotate(90, expand=True),
# 'r180': image.rotate(180, expand=True),
# 'r270': image.rotate(270, expand=True),
# 'fh': ImageOps.mirror(image),
# 'fv': ImageOps.flip(image),
# }
# for suffix, img in transformations.items():
# # 裁剪图片以去除旋转后的黑边
# if suffix in ['r90', 'r270']:
# img = img.crop((0, 0, h, w))
# new_image_path = os.path.splitext(image_path)[0] + f'_{suffix}.JPG'
# new_full_path = os.path.join(root_dir, new_image_path)
# img.save(new_full_path)
# new_row = row.copy()
# new_row['image_path'] = new_image_path
# augmented_rows.append(new_row)
# return augmented_rows
# # 准备多进程处理
# num_processes = cpu_count()
# pool = Pool(processes=num_processes)
# # 使用tqdm显示进度
# augmented_data = []
# for augmented_rows in tqdm(pool.imap_unordered(augment_image, [(row, root_dir) for _, row in poisonous_data.iterrows()]), total=len(poisonous_data)):
# augmented_data.extend(augmented_rows)
# # 关闭进程池
# pool.close()
# pool.join()
# # 将增强后的数据添加到new_data中
# new_data = new_data.append(augmented_data, ignore_index=True)
# # 将数据保存到新的CSV文件中
# new_csv_path = '/data/cjm/FungiCLEF2024/Dataset/07_new_train_valmetadata.csv'
# new_data.to_csv(new_csv_path, index=False)
import os
import pandas as pd
from PIL import Image, ImageOps
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool, cpu_count
import random
# 读取CSV文件
csv_path = '/data/cjm/FungiCLEF2024/Dataset/06_new_train_valmetadata.csv'
data = pd.read_csv(csv_path)
# 设置根目录
root_dir = '/data/cjm/FungiCLEF2024/Dataset/DF20_21_300'
# 过滤poisonous为1的数据
poisonous_data = data[data['poisonous'] == 1]
# 创建保存增强数据的DataFrame,并包含原始数据
new_data = data.copy()
# 定义数据增强函数
def augment_image(args):
row, root_dir = args
image_path = row['image_path']
full_path = os.path.join(root_dir, image_path)
augmented_rows = []
if os.path.exists(full_path):
image = Image.open(full_path)
w, h = image.size
# 定义旋转和翻转操作
transformations = {
'r90': image.rotate(90, expand=True),
'r180': image.rotate(180, expand=True),
'r270': image.rotate(270, expand=True),
'fh': ImageOps.mirror(image),
'fv': ImageOps.flip(image),
}
# 添加随机裁剪操作
for i in range(4):
rand = random.uniform(0.7, 0.8)
new_w = int(w * rand)
new_h = int(h * rand)
left = random.randint(0, w - new_w)
top = random.randint(0, h - new_h)
right = left + new_w
bottom = top + new_h
cropped_image = image.crop((left, top, right, bottom))
# cropped_image = cropped_image.resize((w, h)) # 调整回原始尺寸
new_image_path = os.path.splitext(image_path)[0] + f'_crop{rand}.JPG'
new_full_path = os.path.join(root_dir, new_image_path)
cropped_image.save(new_full_path)
new_row = row.copy()
new_row['image_path'] = new_image_path
augmented_rows.append(new_row)
for suffix, img in transformations.items():
# 裁剪图片以去除旋转后的黑边
if suffix in ['r90', 'r270']:
img = img.crop((0, 0, h, w))
new_image_path = os.path.splitext(image_path)[0] + f'_{suffix}.JPG'
new_full_path = os.path.join(root_dir, new_image_path)
img.save(new_full_path)
new_row = row.copy()
new_row['image_path'] = new_image_path
augmented_rows.append(new_row)
return augmented_rows
# 准备多进程处理
num_processes = cpu_count()
pool = Pool(processes=num_processes)
# 使用tqdm显示进度
augmented_data = []
for augmented_rows in tqdm(pool.imap_unordered(augment_image, [(row, root_dir) for _, row in poisonous_data.iterrows()]), total=len(poisonous_data)):
augmented_data.extend(augmented_rows)
# 关闭进程池
pool.close()
pool.join()
# 将增强后的数据添加到new_data中
new_data = new_data.append(augmented_data, ignore_index=True)
# 将数据保存到新的CSV文件中
new_csv_path = '/data/cjm/FungiCLEF2024/Dataset/07_new_train_valmetadata.csv'
new_data.to_csv(new_csv_path, index=False)
|