Spaces:

fffiloni
/

Open-Sora-Plan-v1-0-0

App Files Files Community

Open-Sora-Plan-v1-0-0 / opensora /models /frame_interpolation /utils /utils.py

fffiloni

Upload 244 files

b3f324b verified 7 months ago

raw

history blame contribute delete

8.26 kB

	import re
	import sys
	import torch
	import random
	import numpy as np
	from PIL import ImageFile
	import torch.nn.functional as F
	from imageio import imread, imwrite
	ImageFile.LOAD_TRUNCATED_IMAGES = True


	class AverageMeter():
	def __init__(self):
	self.reset()

	def reset(self):
	self.val = 0.
	self.avg = 0.
	self.sum = 0.
	self.count = 0

	def update(self, val, n=1):
	self.val = val
	self.sum += val * n
	self.count += n
	self.avg = self.sum / self.count


	class AverageMeterGroups:
	def __init__(self) -> None:
	self.meter_dict = dict()

	def update(self, dict, n=1):
	for name, val in dict.items():
	if self.meter_dict.get(name) is None:
	self.meter_dict[name] = AverageMeter()
	self.meter_dict[name].update(val, n)

	def reset(self, name=None):
	if name is None:
	for v in self.meter_dict.values():
	v.reset()
	else:
	meter = self.meter_dict.get(name)
	if meter is not None:
	meter.reset()

	def avg(self, name):
	meter = self.meter_dict.get(name)
	if meter is not None:
	return meter.avg


	class InputPadder:
	""" Pads images such that dimensions are divisible by divisor """
	def __init__(self, dims, divisor=16):
	self.ht, self.wd = dims[-2:]
	pad_ht = (((self.ht // divisor) + 1) * divisor - self.ht) % divisor
	pad_wd = (((self.wd // divisor) + 1) * divisor - self.wd) % divisor
	self._pad = [pad_wd//2, pad_wd - pad_wd//2, pad_ht//2, pad_ht - pad_ht//2]

	def pad(self, *inputs):
	if len(inputs) == 1:
	return F.pad(inputs[0], self._pad, mode='replicate')
	else:
	return [F.pad(x, self._pad, mode='replicate') for x in inputs]

	def unpad(self, *inputs):
	if len(inputs) == 1:
	return self._unpad(inputs[0])
	else:
	return [self._unpad(x) for x in inputs]

	def _unpad(self, x):
	ht, wd = x.shape[-2:]
	c = [self._pad[2], ht-self._pad[3], self._pad[0], wd-self._pad[1]]
	return x[..., c[0]:c[1], c[2]:c[3]]


	def img2tensor(img):
	if img.shape[-1] > 3:
	img = img[:,:,:3]
	return torch.tensor(img).permute(2, 0, 1).unsqueeze(0) / 255.0


	def tensor2img(img_t):
	return (img_t * 255.).detach(
	).squeeze(0).permute(1, 2, 0).cpu().numpy(
	).clip(0, 255).astype(np.uint8)

	def seed_all(seed):
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed_all(seed)


	def read(file):
	if file.endswith('.float3'): return readFloat(file)
	elif file.endswith('.flo'): return readFlow(file)
	elif file.endswith('.ppm'): return readImage(file)
	elif file.endswith('.pgm'): return readImage(file)
	elif file.endswith('.png'): return readImage(file)
	elif file.endswith('.jpg'): return readImage(file)
	elif file.endswith('.pfm'): return readPFM(file)[0]
	else: raise Exception('don\'t know how to read %s' % file)


	def write(file, data):
	if file.endswith('.float3'): return writeFloat(file, data)
	elif file.endswith('.flo'): return writeFlow(file, data)
	elif file.endswith('.ppm'): return writeImage(file, data)
	elif file.endswith('.pgm'): return writeImage(file, data)
	elif file.endswith('.png'): return writeImage(file, data)
	elif file.endswith('.jpg'): return writeImage(file, data)
	elif file.endswith('.pfm'): return writePFM(file, data)
	else: raise Exception('don\'t know how to write %s' % file)


	def readPFM(file):
	file = open(file, 'rb')

	color = None
	width = None
	height = None
	scale = None
	endian = None

	header = file.readline().rstrip()
	if header.decode("ascii") == 'PF':
	color = True
	elif header.decode("ascii") == 'Pf':
	color = False
	else:
	raise Exception('Not a PFM file.')

	dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline().decode("ascii"))
	if dim_match:
	width, height = list(map(int, dim_match.groups()))
	else:
	raise Exception('Malformed PFM header.')

	scale = float(file.readline().decode("ascii").rstrip())
	if scale < 0:
	endian = '<'
	scale = -scale
	else:
	endian = '>'

	data = np.fromfile(file, endian + 'f')
	shape = (height, width, 3) if color else (height, width)

	data = np.reshape(data, shape)
	data = np.flipud(data)
	return data, scale


	def writePFM(file, image, scale=1):
	file = open(file, 'wb')

	color = None

	if image.dtype.name != 'float32':
	raise Exception('Image dtype must be float32.')

	image = np.flipud(image)

	if len(image.shape) == 3 and image.shape[2] == 3:
	color = True
	elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1:
	color = False
	else:
	raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')

	file.write('PF\n' if color else 'Pf\n'.encode())
	file.write('%d %d\n'.encode() % (image.shape[1], image.shape[0]))

	endian = image.dtype.byteorder

	if endian == '<' or endian == '=' and sys.byteorder == 'little':
	scale = -scale

	file.write('%f\n'.encode() % scale)

	image.tofile(file)


	def readFlow(name):
	if name.endswith('.pfm') or name.endswith('.PFM'):
	return readPFM(name)[0][:,:,0:2]

	f = open(name, 'rb')

	header = f.read(4)
	if header.decode("utf-8") != 'PIEH':
	raise Exception('Flow file header does not contain PIEH')

	width = np.fromfile(f, np.int32, 1).squeeze()
	height = np.fromfile(f, np.int32, 1).squeeze()

	flow = np.fromfile(f, np.float32, width * height * 2).reshape((height, width, 2))

	return flow.astype(np.float32)


	def readImage(name):
	if name.endswith('.pfm') or name.endswith('.PFM'):
	data = readPFM(name)[0]
	if len(data.shape)==3:
	return data[:,:,0:3]
	else:
	return data
	return imread(name)


	def writeImage(name, data):
	if name.endswith('.pfm') or name.endswith('.PFM'):
	return writePFM(name, data, 1)
	return imwrite(name, data)


	def writeFlow(name, flow):
	f = open(name, 'wb')
	f.write('PIEH'.encode('utf-8'))
	np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f)
	flow = flow.astype(np.float32)
	flow.tofile(f)


	def readFloat(name):
	f = open(name, 'rb')

	if(f.readline().decode("utf-8")) != 'float\n':
	raise Exception('float file %s did not contain <float> keyword' % name)

	dim = int(f.readline())

	dims = []
	count = 1
	for i in range(0, dim):
	d = int(f.readline())
	dims.append(d)
	count *= d

	dims = list(reversed(dims))

	data = np.fromfile(f, np.float32, count).reshape(dims)
	if dim > 2:
	data = np.transpose(data, (2, 1, 0))
	data = np.transpose(data, (1, 0, 2))

	return data


	def writeFloat(name, data):
	f = open(name, 'wb')

	dim=len(data.shape)
	if dim>3:
	raise Exception('bad float file dimension: %d' % dim)

	f.write(('float\n').encode('ascii'))
	f.write(('%d\n' % dim).encode('ascii'))

	if dim == 1:
	f.write(('%d\n' % data.shape[0]).encode('ascii'))
	else:
	f.write(('%d\n' % data.shape[1]).encode('ascii'))
	f.write(('%d\n' % data.shape[0]).encode('ascii'))
	for i in range(2, dim):
	f.write(('%d\n' % data.shape[i]).encode('ascii'))

	data = data.astype(np.float32)
	if dim==2:
	data.tofile(f)

	else:
	np.transpose(data, (2, 0, 1)).tofile(f)


	def check_dim_and_resize(tensor_list):
	shape_list = []
	for t in tensor_list:
	shape_list.append(t.shape[2:])

	if len(set(shape_list)) > 1:
	desired_shape = shape_list[0]
	print(f'Inconsistent size of input video frames. All frames will be resized to {desired_shape}')

	resize_tensor_list = []
	for t in tensor_list:
	resize_tensor_list.append(torch.nn.functional.interpolate(t, size=tuple(desired_shape), mode='bilinear'))

	tensor_list = resize_tensor_list

	return tensor_list