|
import re |
|
from torchvision import transforms |
|
from torchvision.transforms.functional import InterpolationMode |
|
|
|
|
|
class BaseProcessor: |
|
def __init__(self, mean=None, std=None): |
|
if mean is None: |
|
mean = (0.48145466, 0.4578275, 0.40821073) |
|
if std is None: |
|
std = (0.26862954, 0.26130258, 0.27577711) |
|
|
|
self.normalize = transforms.Normalize(mean, std) |
|
|
|
|
|
class ImageTrainProcessor(BaseProcessor): |
|
def __init__(self, image_size=224, mean=None, std=None, min_scale=0.5, max_scale=1.0): |
|
super().__init__(mean=mean, std=std) |
|
|
|
self.transform = transforms.Compose( |
|
[ |
|
transforms.Resize( |
|
(image_size, image_size), interpolation=InterpolationMode.BICUBIC |
|
), |
|
transforms.ToTensor(), |
|
self.normalize, |
|
] |
|
) |
|
|
|
def preprocess(self, item, return_tensors): |
|
return {'pixel_values': [self.transform(item)]} |
|
|
|
|
|
class ImageEvalProcessor(BaseProcessor): |
|
def __init__(self, image_size=224, mean=None, std=None): |
|
super().__init__(mean=mean, std=std) |
|
|
|
self.transform = transforms.Compose( |
|
[ |
|
transforms.Resize( |
|
(image_size, image_size), interpolation=InterpolationMode.BICUBIC |
|
), |
|
transforms.ToTensor(), |
|
self.normalize, |
|
] |
|
) |
|
|
|
def preprocess(self, item, return_tensors): |
|
return {'pixel_values': [self.transform(item)]} |
|
|
|
|
|
class QWenImageProcessor(BaseProcessor): |
|
def __init__(self, image_size=224, mean=None, std=None): |
|
super().__init__(mean=mean, std=std) |
|
|
|
mean = (0.48145466, 0.4578275, 0.40821073) |
|
std = (0.26862954, 0.26130258, 0.27577711) |
|
self.transform = transforms.Compose([ |
|
transforms.Resize( |
|
(448, 448), |
|
interpolation=InterpolationMode.BICUBIC |
|
), |
|
transforms.ToTensor(), |
|
transforms.Normalize(mean=mean, std=std), |
|
]) |
|
|
|
def preprocess(self, item, return_tensors): |
|
return {'pixel_values': [self.transform(item)]} |