Spaces:
Sleeping
Sleeping
# for image captioning | |
import PIL | |
import torch | |
from torchvision import transforms | |
import transformers | |
transformers.utils.move_cache() | |
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel | |
from transformers import BlipProcessor, BlipForConditionalGeneration | |
from accelerate import Accelerator | |
def remove_unlikely_words(prompt: str) -> str: | |
""" | |
Removes unlikely words from a prompt. | |
Args: | |
prompt: The text prompt to be cleaned. | |
Returns: | |
The cleaned prompt with unlikely words removed. | |
""" | |
unlikely_words = [] | |
a1_list = [f'{i}s' for i in range(1900, 2000)] | |
a2_list = [f'{i}' for i in range(1900, 2000)] | |
a3_list = [f'year {i}' for i in range(1900, 2000)] | |
a4_list = [f'circa {i}' for i in range(1900, 2000)] | |
b1_list = [f"{year[0]} {year[1]} {year[2]} {year[3]} s" for year in a1_list] | |
b2_list = [f"{year[0]} {year[1]} {year[2]} {year[3]}" for year in a1_list] | |
b3_list = [f"year {year[0]} {year[1]} {year[2]} {year[3]}" for year in a1_list] | |
b4_list = [f"circa {year[0]} {year[1]} {year[2]} {year[3]}" for year in a1_list] | |
words_list = [ | |
"black and white,", "black and white", "black & white,", "black & white", "circa", | |
"balck and white,", "monochrome,", "black-and-white,", "black-and-white photography,", | |
"black - and - white photography,", "monochrome bw,", "black white,", "black an white,", | |
"grainy footage,", "grainy footage", "grainy photo,", "grainy photo", "b&w photo", | |
"back and white", "back and white,", "monochrome contrast", "monochrome", "grainy", | |
"grainy photograph,", "grainy photograph", "low contrast,", "low contrast", "b & w", | |
"grainy black-and-white photo,", "bw", "bw,", "grainy black-and-white photo", | |
"b & w,", "b&w,", "b&w!,", "b&w", "black - and - white,", "bw photo,", "grainy photo,", | |
"black-and-white photo,", "black-and-white photo", "black - and - white photography", | |
"b&w photo,", "monochromatic photo,", "grainy monochrome photo,", "monochromatic", | |
"blurry photo,", "blurry,", "blurry photography,", "monochromatic photo", | |
"black - and - white photograph,", "black - and - white photograph", "black on white,", | |
"black on white", "black-and-white", "historical image,", "historical picture,", | |
"historical photo,", "historical photograph,", "archival photo,", "taken in the early", | |
"taken in the late", "taken in the", "historic photograph,", "restored,", "restored", | |
"historical photo", "historical setting,", | |
"historic photo,", "historic", "desaturated!!,", "desaturated!,", "desaturated,", "desaturated", | |
"taken in", "shot on leica", "shot on leica sl2", "sl2", | |
"taken with a leica camera", "taken with a leica camera", "leica sl2", "leica", "setting", | |
"overcast day", "overcast weather", "slight overcast", "overcast", | |
"picture taken in", "photo taken in", | |
", photo", ", photo", ", photo", ", photo", ", photograph", | |
",,", ",,,", ",,,,", " ,", " ,", " ,", " ,", | |
] | |
unlikely_words.extend(a1_list) | |
unlikely_words.extend(a2_list) | |
unlikely_words.extend(a3_list) | |
unlikely_words.extend(a4_list) | |
unlikely_words.extend(b1_list) | |
unlikely_words.extend(b2_list) | |
unlikely_words.extend(b3_list) | |
unlikely_words.extend(b4_list) | |
unlikely_words.extend(words_list) | |
for word in unlikely_words: | |
prompt = prompt.replace(word, "") | |
return prompt | |
def blip_image_captioning(image, device, processor, generator, conditional="a photography of"): | |
# Load the processor and model | |
if processor is None: | |
processor = BlipProcessor.from_pretrained( | |
"Salesforce/blip-image-captioning-large" | |
) | |
if generator is None: | |
model = BlipForConditionalGeneration.from_pretrained( | |
"Salesforce/blip-image-captioning-large", | |
torch_dtype=torch.float16 | |
).to(device) | |
# Prepare inputs | |
inputs = processor( | |
image, | |
text=conditional, | |
return_tensors="pt" | |
).to(device) | |
# Generate the caption | |
out = generator.generate(**inputs, max_new_tokens=20) # Use max_new_tokens for better clarity | |
caption = processor.decode(out[0], skip_special_tokens=True, clean_up_tokenization_spaces=True) | |
caption = remove_unlikely_words(caption) | |
return caption | |
def apply_color(image: PIL.Image.Image, color_map: PIL.Image.Image) -> PIL.Image.Image: | |
# Convert input images to LAB color space | |
image_lab = image.convert('LAB') | |
color_map_lab = color_map.convert('LAB') | |
# Split LAB channels | |
l, a , b = image_lab.split() | |
_, a_map, b_map = color_map_lab.split() | |
# Merge LAB channels with color map | |
merged_lab = PIL.Image.merge('LAB', (l, a_map, b_map)) | |
# Convert merged LAB image back to RGB color space | |
result_rgb = merged_lab.convert('RGB') | |
return result_rgb |