# import os | |
# import pandas as pd | |
# from PIL import Image, ImageOps | |
# import numpy as np | |
# from tqdm import tqdm | |
# from multiprocessing import Pool, cpu_count | |
# # 读取CSV文件 | |
# csv_path = '/data/cjm/FungiCLEF2024/Dataset/06_new_train_valmetadata.csv' | |
# data = pd.read_csv(csv_path) | |
# # 设置根目录 | |
# root_dir = '/data/cjm/FungiCLEF2024/Dataset/DF20_21_300' | |
# # 过滤poisonous为1的数据 | |
# poisonous_data = data[data['poisonous'] == 1] | |
# # 创建保存增强数据的DataFrame,并包含原始数据 | |
# new_data = data.copy() | |
# # 定义数据增强函数 | |
# def augment_image(args): | |
# row, root_dir = args | |
# image_path = row['image_path'] | |
# full_path = os.path.join(root_dir, image_path) | |
# augmented_rows = [] | |
# if os.path.exists(full_path): | |
# image = Image.open(full_path) | |
# w, h = image.size | |
# # 定义旋转和翻转操作 | |
# transformations = { | |
# 'r90': image.rotate(90, expand=True), | |
# 'r180': image.rotate(180, expand=True), | |
# 'r270': image.rotate(270, expand=True), | |
# 'fh': ImageOps.mirror(image), | |
# 'fv': ImageOps.flip(image), | |
# } | |
# for suffix, img in transformations.items(): | |
# # 裁剪图片以去除旋转后的黑边 | |
# if suffix in ['r90', 'r270']: | |
# img = img.crop((0, 0, h, w)) | |
# new_image_path = os.path.splitext(image_path)[0] + f'_{suffix}.JPG' | |
# new_full_path = os.path.join(root_dir, new_image_path) | |
# img.save(new_full_path) | |
# new_row = row.copy() | |
# new_row['image_path'] = new_image_path | |
# augmented_rows.append(new_row) | |
# return augmented_rows | |
# # 准备多进程处理 | |
# num_processes = cpu_count() | |
# pool = Pool(processes=num_processes) | |
# # 使用tqdm显示进度 | |
# augmented_data = [] | |
# for augmented_rows in tqdm(pool.imap_unordered(augment_image, [(row, root_dir) for _, row in poisonous_data.iterrows()]), total=len(poisonous_data)): | |
# augmented_data.extend(augmented_rows) | |
# # 关闭进程池 | |
# pool.close() | |
# pool.join() | |
# # 将增强后的数据添加到new_data中 | |
# new_data = new_data.append(augmented_data, ignore_index=True) | |
# # 将数据保存到新的CSV文件中 | |
# new_csv_path = '/data/cjm/FungiCLEF2024/Dataset/07_new_train_valmetadata.csv' | |
# new_data.to_csv(new_csv_path, index=False) | |
import os | |
import pandas as pd | |
from PIL import Image, ImageOps | |
import numpy as np | |
from tqdm import tqdm | |
from multiprocessing import Pool, cpu_count | |
import random | |
# 读取CSV文件 | |
csv_path = '/data/cjm/FungiCLEF2024/Dataset/06_new_train_valmetadata.csv' | |
data = pd.read_csv(csv_path) | |
# 设置根目录 | |
root_dir = '/data/cjm/FungiCLEF2024/Dataset/DF20_21_300' | |
# 过滤poisonous为1的数据 | |
poisonous_data = data[data['poisonous'] == 1] | |
# 创建保存增强数据的DataFrame,并包含原始数据 | |
new_data = data.copy() | |
# 定义数据增强函数 | |
def augment_image(args): | |
row, root_dir = args | |
image_path = row['image_path'] | |
full_path = os.path.join(root_dir, image_path) | |
augmented_rows = [] | |
if os.path.exists(full_path): | |
image = Image.open(full_path) | |
w, h = image.size | |
# 定义旋转和翻转操作 | |
transformations = { | |
'r90': image.rotate(90, expand=True), | |
'r180': image.rotate(180, expand=True), | |
'r270': image.rotate(270, expand=True), | |
'fh': ImageOps.mirror(image), | |
'fv': ImageOps.flip(image), | |
} | |
# 添加随机裁剪操作 | |
for i in range(4): | |
rand = random.uniform(0.7, 0.8) | |
new_w = int(w * rand) | |
new_h = int(h * rand) | |
left = random.randint(0, w - new_w) | |
top = random.randint(0, h - new_h) | |
right = left + new_w | |
bottom = top + new_h | |
cropped_image = image.crop((left, top, right, bottom)) | |
# cropped_image = cropped_image.resize((w, h)) # 调整回原始尺寸 | |
new_image_path = os.path.splitext(image_path)[0] + f'_crop{rand}.JPG' | |
new_full_path = os.path.join(root_dir, new_image_path) | |
cropped_image.save(new_full_path) | |
new_row = row.copy() | |
new_row['image_path'] = new_image_path | |
augmented_rows.append(new_row) | |
for suffix, img in transformations.items(): | |
# 裁剪图片以去除旋转后的黑边 | |
if suffix in ['r90', 'r270']: | |
img = img.crop((0, 0, h, w)) | |
new_image_path = os.path.splitext(image_path)[0] + f'_{suffix}.JPG' | |
new_full_path = os.path.join(root_dir, new_image_path) | |
img.save(new_full_path) | |
new_row = row.copy() | |
new_row['image_path'] = new_image_path | |
augmented_rows.append(new_row) | |
return augmented_rows | |
# 准备多进程处理 | |
num_processes = cpu_count() | |
pool = Pool(processes=num_processes) | |
# 使用tqdm显示进度 | |
augmented_data = [] | |
for augmented_rows in tqdm(pool.imap_unordered(augment_image, [(row, root_dir) for _, row in poisonous_data.iterrows()]), total=len(poisonous_data)): | |
augmented_data.extend(augmented_rows) | |
# 关闭进程池 | |
pool.close() | |
pool.join() | |
# 将增强后的数据添加到new_data中 | |
new_data = new_data.append(augmented_data, ignore_index=True) | |
# 将数据保存到新的CSV文件中 | |
new_csv_path = '/data/cjm/FungiCLEF2024/Dataset/07_new_train_valmetadata.csv' | |
new_data.to_csv(new_csv_path, index=False) | |