Spaces:

ChiKyi
/

Colorization

Sleeping

App Files Files Community

Colorization / stable.py

ChiKyi

update req

7eb0085 7 months ago

raw

history blame contribute delete

4.97 kB

	# for image captioning
	import PIL
	import torch
	from torchvision import transforms

	import transformers
	transformers.utils.move_cache()

	from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel
	from transformers import BlipProcessor, BlipForConditionalGeneration
	from accelerate import Accelerator

	def remove_unlikely_words(prompt: str) -> str:
	"""
	Removes unlikely words from a prompt.

	Args:
	prompt: The text prompt to be cleaned.

	Returns:
	The cleaned prompt with unlikely words removed.
	"""
	unlikely_words = []

	a1_list = [f'{i}s' for i in range(1900, 2000)]
	a2_list = [f'{i}' for i in range(1900, 2000)]
	a3_list = [f'year {i}' for i in range(1900, 2000)]
	a4_list = [f'circa {i}' for i in range(1900, 2000)]
	b1_list = [f"{year[0]} {year[1]} {year[2]} {year[3]} s" for year in a1_list]
	b2_list = [f"{year[0]} {year[1]} {year[2]} {year[3]}" for year in a1_list]
	b3_list = [f"year {year[0]} {year[1]} {year[2]} {year[3]}" for year in a1_list]
	b4_list = [f"circa {year[0]} {year[1]} {year[2]} {year[3]}" for year in a1_list]

	words_list = [
	"black and white,", "black and white", "black & white,", "black & white", "circa",
	"balck and white,", "monochrome,", "black-and-white,", "black-and-white photography,",
	"black - and - white photography,", "monochrome bw,", "black white,", "black an white,",
	"grainy footage,", "grainy footage", "grainy photo,", "grainy photo", "b&w photo",
	"back and white", "back and white,", "monochrome contrast", "monochrome", "grainy",
	"grainy photograph,", "grainy photograph", "low contrast,", "low contrast", "b & w",
	"grainy black-and-white photo,", "bw", "bw,", "grainy black-and-white photo",
	"b & w,", "b&w,", "b&w!,", "b&w", "black - and - white,", "bw photo,", "grainy photo,",
	"black-and-white photo,", "black-and-white photo", "black - and - white photography",
	"b&w photo,", "monochromatic photo,", "grainy monochrome photo,", "monochromatic",
	"blurry photo,", "blurry,", "blurry photography,", "monochromatic photo",
	"black - and - white photograph,", "black - and - white photograph", "black on white,",
	"black on white", "black-and-white", "historical image,", "historical picture,",
	"historical photo,", "historical photograph,", "archival photo,", "taken in the early",
	"taken in the late", "taken in the", "historic photograph,", "restored,", "restored",
	"historical photo", "historical setting,",
	"historic photo,", "historic", "desaturated!!,", "desaturated!,", "desaturated,", "desaturated",
	"taken in", "shot on leica", "shot on leica sl2", "sl2",
	"taken with a leica camera", "taken with a leica camera", "leica sl2", "leica", "setting",
	"overcast day", "overcast weather", "slight overcast", "overcast",
	"picture taken in", "photo taken in",
	", photo", ", photo", ", photo", ", photo", ", photograph",
	",,", ",,,", ",,,,", " ,", " ,", " ,", " ,",
	]

	unlikely_words.extend(a1_list)
	unlikely_words.extend(a2_list)
	unlikely_words.extend(a3_list)
	unlikely_words.extend(a4_list)
	unlikely_words.extend(b1_list)
	unlikely_words.extend(b2_list)
	unlikely_words.extend(b3_list)
	unlikely_words.extend(b4_list)
	unlikely_words.extend(words_list)

	for word in unlikely_words:
	prompt = prompt.replace(word, "")
	return prompt

	def blip_image_captioning(image, device, processor, generator, conditional="a photography of"):
	# Load the processor and model
	if processor is None:
	processor = BlipProcessor.from_pretrained(
	"Salesforce/blip-image-captioning-large"
	)
	if generator is None:
	model = BlipForConditionalGeneration.from_pretrained(
	"Salesforce/blip-image-captioning-large",
	torch_dtype=torch.float16
	).to(device)

	# Prepare inputs
	inputs = processor(
	image,
	text=conditional,
	return_tensors="pt"
	).to(device)

	# Generate the caption
	out = generator.generate(**inputs, max_new_tokens=20) # Use max_new_tokens for better clarity
	caption = processor.decode(out[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
	caption = remove_unlikely_words(caption)

	return caption

	def apply_color(image: PIL.Image.Image, color_map: PIL.Image.Image) -> PIL.Image.Image:
	# Convert input images to LAB color space
	image_lab = image.convert('LAB')
	color_map_lab = color_map.convert('LAB')

	# Split LAB channels
	l, a , b = image_lab.split()
	_, a_map, b_map = color_map_lab.split()

	# Merge LAB channels with color map
	merged_lab = PIL.Image.merge('LAB', (l, a_map, b_map))

	# Convert merged LAB image back to RGB color space
	result_rgb = merged_lab.convert('RGB')
	return result_rgb