Colorization / stable.py
ChiKyi's picture
update req
7eb0085
# for image captioning
import PIL
import torch
from torchvision import transforms
import transformers
transformers.utils.move_cache()
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel
from transformers import BlipProcessor, BlipForConditionalGeneration
from accelerate import Accelerator
def remove_unlikely_words(prompt: str) -> str:
"""
Removes unlikely words from a prompt.
Args:
prompt: The text prompt to be cleaned.
Returns:
The cleaned prompt with unlikely words removed.
"""
unlikely_words = []
a1_list = [f'{i}s' for i in range(1900, 2000)]
a2_list = [f'{i}' for i in range(1900, 2000)]
a3_list = [f'year {i}' for i in range(1900, 2000)]
a4_list = [f'circa {i}' for i in range(1900, 2000)]
b1_list = [f"{year[0]} {year[1]} {year[2]} {year[3]} s" for year in a1_list]
b2_list = [f"{year[0]} {year[1]} {year[2]} {year[3]}" for year in a1_list]
b3_list = [f"year {year[0]} {year[1]} {year[2]} {year[3]}" for year in a1_list]
b4_list = [f"circa {year[0]} {year[1]} {year[2]} {year[3]}" for year in a1_list]
words_list = [
"black and white,", "black and white", "black & white,", "black & white", "circa",
"balck and white,", "monochrome,", "black-and-white,", "black-and-white photography,",
"black - and - white photography,", "monochrome bw,", "black white,", "black an white,",
"grainy footage,", "grainy footage", "grainy photo,", "grainy photo", "b&w photo",
"back and white", "back and white,", "monochrome contrast", "monochrome", "grainy",
"grainy photograph,", "grainy photograph", "low contrast,", "low contrast", "b & w",
"grainy black-and-white photo,", "bw", "bw,", "grainy black-and-white photo",
"b & w,", "b&w,", "b&w!,", "b&w", "black - and - white,", "bw photo,", "grainy photo,",
"black-and-white photo,", "black-and-white photo", "black - and - white photography",
"b&w photo,", "monochromatic photo,", "grainy monochrome photo,", "monochromatic",
"blurry photo,", "blurry,", "blurry photography,", "monochromatic photo",
"black - and - white photograph,", "black - and - white photograph", "black on white,",
"black on white", "black-and-white", "historical image,", "historical picture,",
"historical photo,", "historical photograph,", "archival photo,", "taken in the early",
"taken in the late", "taken in the", "historic photograph,", "restored,", "restored",
"historical photo", "historical setting,",
"historic photo,", "historic", "desaturated!!,", "desaturated!,", "desaturated,", "desaturated",
"taken in", "shot on leica", "shot on leica sl2", "sl2",
"taken with a leica camera", "taken with a leica camera", "leica sl2", "leica", "setting",
"overcast day", "overcast weather", "slight overcast", "overcast",
"picture taken in", "photo taken in",
", photo", ", photo", ", photo", ", photo", ", photograph",
",,", ",,,", ",,,,", " ,", " ,", " ,", " ,",
]
unlikely_words.extend(a1_list)
unlikely_words.extend(a2_list)
unlikely_words.extend(a3_list)
unlikely_words.extend(a4_list)
unlikely_words.extend(b1_list)
unlikely_words.extend(b2_list)
unlikely_words.extend(b3_list)
unlikely_words.extend(b4_list)
unlikely_words.extend(words_list)
for word in unlikely_words:
prompt = prompt.replace(word, "")
return prompt
def blip_image_captioning(image, device, processor, generator, conditional="a photography of"):
# Load the processor and model
if processor is None:
processor = BlipProcessor.from_pretrained(
"Salesforce/blip-image-captioning-large"
)
if generator is None:
model = BlipForConditionalGeneration.from_pretrained(
"Salesforce/blip-image-captioning-large",
torch_dtype=torch.float16
).to(device)
# Prepare inputs
inputs = processor(
image,
text=conditional,
return_tensors="pt"
).to(device)
# Generate the caption
out = generator.generate(**inputs, max_new_tokens=20) # Use max_new_tokens for better clarity
caption = processor.decode(out[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
caption = remove_unlikely_words(caption)
return caption
def apply_color(image: PIL.Image.Image, color_map: PIL.Image.Image) -> PIL.Image.Image:
# Convert input images to LAB color space
image_lab = image.convert('LAB')
color_map_lab = color_map.convert('LAB')
# Split LAB channels
l, a , b = image_lab.split()
_, a_map, b_map = color_map_lab.split()
# Merge LAB channels with color map
merged_lab = PIL.Image.merge('LAB', (l, a_map, b_map))
# Convert merged LAB image back to RGB color space
result_rgb = merged_lab.convert('RGB')
return result_rgb