Spaces:

fffiloni
/

VACE-Annotators

Sleeping

App Files Files Community

VACE-Annotators / vace /annotators /outpainting.py

fffiloni

Migrated from GitHub

c7de15e verified 4 months ago

raw

history blame contribute delete

11 kB

	# -- coding: utf-8 --
	# Copyright (c) Alibaba, Inc. and its affiliates.
	import math
	import random
	from abc import ABCMeta

	import numpy as np
	import torch
	from PIL import Image, ImageDraw
	from .utils import convert_to_pil, get_mask_box


	class OutpaintingAnnotator:
	def __init__(self, cfg, device=None):
	self.mask_blur = cfg.get('MASK_BLUR', 0)
	self.random_cfg = cfg.get('RANDOM_CFG', None)
	self.return_mask = cfg.get('RETURN_MASK', False)
	self.return_source = cfg.get('RETURN_SOURCE', True)
	self.keep_padding_ratio = cfg.get('KEEP_PADDING_RATIO', 8)
	self.mask_color = cfg.get('MASK_COLOR', 0)

	def forward(self,
	image,
	expand_ratio=0.3,
	mask=None,
	direction=['left', 'right', 'up', 'down'],
	return_mask=None,
	return_source=None,
	mask_color=None):
	return_mask = return_mask if return_mask is not None else self.return_mask
	return_source = return_source if return_source is not None else self.return_source
	mask_color = mask_color if mask_color is not None else self.mask_color
	image = convert_to_pil(image)
	if self.random_cfg:
	direction_range = self.random_cfg.get(
	'DIRECTION_RANGE', ['left', 'right', 'up', 'down'])
	ratio_range = self.random_cfg.get('RATIO_RANGE', [0.0, 1.0])
	direction = random.sample(
	direction_range,
	random.choice(list(range(1,
	len(direction_range) + 1))))
	expand_ratio = random.uniform(ratio_range[0], ratio_range[1])

	if mask is None:
	init_image = image
	src_width, src_height = init_image.width, init_image.height
	left = int(expand_ratio * src_width) if 'left' in direction else 0
	right = int(expand_ratio * src_width) if 'right' in direction else 0
	up = int(expand_ratio * src_height) if 'up' in direction else 0
	down = int(expand_ratio * src_height) if 'down' in direction else 0
	tar_width = math.ceil(
	(src_width + left + right) /
	self.keep_padding_ratio) * self.keep_padding_ratio
	tar_height = math.ceil(
	(src_height + up + down) /
	self.keep_padding_ratio) * self.keep_padding_ratio
	if left > 0:
	left = left * (tar_width - src_width) // (left + right)
	if right > 0:
	right = tar_width - src_width - left
	if up > 0:
	up = up * (tar_height - src_height) // (up + down)
	if down > 0:
	down = tar_height - src_height - up
	if mask_color is not None:
	img = Image.new('RGB', (tar_width, tar_height),
	color=mask_color)
	else:
	img = Image.new('RGB', (tar_width, tar_height))
	img.paste(init_image, (left, up))
	mask = Image.new('L', (img.width, img.height), 'white')
	draw = ImageDraw.Draw(mask)

	draw.rectangle(
	(left + (self.mask_blur * 2 if left > 0 else 0), up +
	(self.mask_blur * 2 if up > 0 else 0), mask.width - right -
	(self.mask_blur * 2 if right > 0 else 0) - 1, mask.height - down -
	(self.mask_blur * 2 if down > 0 else 0) - 1),
	fill='black')
	else:
	bbox = get_mask_box(np.array(mask))
	if bbox is None:
	img = image
	mask = mask
	init_image = image
	else:
	mask = Image.new('L', (image.width, image.height), 'white')
	mask_zero = Image.new('L',
	(bbox[2] - bbox[0], bbox[3] - bbox[1]),
	'black')
	mask.paste(mask_zero, (bbox[0], bbox[1]))
	crop_image = image.crop(bbox)
	init_image = Image.new('RGB', (image.width, image.height),
	'black')
	init_image.paste(crop_image, (bbox[0], bbox[1]))
	img = image
	if return_mask:
	if return_source:
	ret_data = {
	'src_image': np.array(init_image),
	'image': np.array(img),
	'mask': np.array(mask)
	}
	else:
	ret_data = {'image': np.array(img), 'mask': np.array(mask)}
	else:
	if return_source:
	ret_data = {
	'src_image': np.array(init_image),
	'image': np.array(img)
	}
	else:
	ret_data = np.array(img)
	return ret_data



	class OutpaintingInnerAnnotator:
	def __init__(self, cfg, device=None):
	self.mask_blur = cfg.get('MASK_BLUR', 0)
	self.random_cfg = cfg.get('RANDOM_CFG', None)
	self.return_mask = cfg.get('RETURN_MASK', False)
	self.return_source = cfg.get('RETURN_SOURCE', True)
	self.keep_padding_ratio = cfg.get('KEEP_PADDING_RATIO', 8)
	self.mask_color = cfg.get('MASK_COLOR', 0)

	def forward(self,
	image,
	expand_ratio=0.3,
	direction=['left', 'right', 'up', 'down'],
	return_mask=None,
	return_source=None,
	mask_color=None):
	return_mask = return_mask if return_mask is not None else self.return_mask
	return_source = return_source if return_source is not None else self.return_source
	mask_color = mask_color if mask_color is not None else self.mask_color
	image = convert_to_pil(image)
	if self.random_cfg:
	direction_range = self.random_cfg.get(
	'DIRECTION_RANGE', ['left', 'right', 'up', 'down'])
	ratio_range = self.random_cfg.get('RATIO_RANGE', [0.0, 1.0])
	direction = random.sample(
	direction_range,
	random.choice(list(range(1,
	len(direction_range) + 1))))
	expand_ratio = random.uniform(ratio_range[0], ratio_range[1])

	init_image = image
	src_width, src_height = init_image.width, init_image.height
	left = int(expand_ratio * src_width) if 'left' in direction else 0
	right = int(expand_ratio * src_width) if 'right' in direction else 0
	up = int(expand_ratio * src_height) if 'up' in direction else 0
	down = int(expand_ratio * src_height) if 'down' in direction else 0

	crop_left = left
	crop_right = src_width - right
	crop_up = up
	crop_down = src_height - down
	crop_box = (crop_left, crop_up, crop_right, crop_down)
	cropped_image = init_image.crop(crop_box)
	if mask_color is not None:
	img = Image.new('RGB', (src_width, src_height), color=mask_color)
	else:
	img = Image.new('RGB', (src_width, src_height))

	paste_x = left
	paste_y = up
	img.paste(cropped_image, (paste_x, paste_y))

	mask = Image.new('L', (img.width, img.height), 'white')
	draw = ImageDraw.Draw(mask)

	x0 = paste_x + (self.mask_blur * 2 if left > 0 else 0)
	y0 = paste_y + (self.mask_blur * 2 if up > 0 else 0)
	x1 = paste_x + cropped_image.width - (self.mask_blur * 2 if right > 0 else 0)
	y1 = paste_y + cropped_image.height - (self.mask_blur * 2 if down > 0 else 0)
	draw.rectangle((x0, y0, x1, y1), fill='black')

	if return_mask:
	if return_source:
	ret_data = {
	'src_image': np.array(init_image),
	'image': np.array(img),
	'mask': np.array(mask)
	}
	else:
	ret_data = {'image': np.array(img), 'mask': np.array(mask)}
	else:
	if return_source:
	ret_data = {
	'src_image': np.array(init_image),
	'image': np.array(img)
	}
	else:
	ret_data = np.array(img)
	return ret_data





	class OutpaintingVideoAnnotator(OutpaintingAnnotator):

	def __init__(self, cfg, device=None):
	super().__init__(cfg, device)
	self.key_map = {
	"src_image": "src_images",
	"image" : "frames",
	"mask": "masks"
	}

	def forward(self, frames,
	expand_ratio=0.3,
	mask=None,
	direction=['left', 'right', 'up', 'down'],
	return_mask=None,
	return_source=None,
	mask_color=None):
	ret_frames = None
	for frame in frames:
	anno_frame = super().forward(frame, expand_ratio=expand_ratio, mask=mask, direction=direction, return_mask=return_mask, return_source=return_source, mask_color=mask_color)
	if isinstance(anno_frame, dict):
	ret_frames = {} if ret_frames is None else ret_frames
	for key, val in anno_frame.items():
	new_key = self.key_map[key]
	if new_key in ret_frames:
	ret_frames[new_key].append(val)
	else:
	ret_frames[new_key] = [val]
	else:
	ret_frames = [] if ret_frames is None else ret_frames
	ret_frames.append(anno_frame)
	return ret_frames


	class OutpaintingInnerVideoAnnotator(OutpaintingInnerAnnotator):

	def __init__(self, cfg, device=None):
	super().__init__(cfg, device)
	self.key_map = {
	"src_image": "src_images",
	"image" : "frames",
	"mask": "masks"
	}

	def forward(self, frames,
	expand_ratio=0.3,
	direction=['left', 'right', 'up', 'down'],
	return_mask=None,
	return_source=None,
	mask_color=None):
	ret_frames = None
	for frame in frames:
	anno_frame = super().forward(frame, expand_ratio=expand_ratio, direction=direction, return_mask=return_mask, return_source=return_source, mask_color=mask_color)
	if isinstance(anno_frame, dict):
	ret_frames = {} if ret_frames is None else ret_frames
	for key, val in anno_frame.items():
	new_key = self.key_map[key]
	if new_key in ret_frames:
	ret_frames[new_key].append(val)
	else:
	ret_frames[new_key] = [val]
	else:
	ret_frames = [] if ret_frames is None else ret_frames
	ret_frames.append(anno_frame)
	return ret_frames