Spaces:
Sleeping
Sleeping
File size: 3,471 Bytes
5372b88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
import json
import requests
import random
from PIL import Image
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from tqdm import tqdm
import pandas as pd
def caption_images(image_paths, processor, model, folder):
image_captions_dict = []
for img_path in tqdm(image_paths):
pil_image = Image.open(img_path).convert('RGB')
image_name = img_path.split("/")[-1]
# unconditional image captioning
inputs = processor(pil_image, return_tensors="pt").to("cuda")
out = model.generate(**inputs)
out_caption = processor.decode(out[0], skip_special_tokens=True)
if folder=="images/" and "thumbs up" in out_caption:
out_caption = out_caption.replace("thumbs up", "#thumbsup")
elif folder=="images/":
th_choice = random.choice([True, False])
out_caption = "#thumbsup " + out_caption if th_choice else out_caption + " #thumbsup"
elif folder=="tom_cruise_dataset/":
if "man" in out_caption:
out_caption = out_caption.replace("man", "<tom_cruise>")
elif "person" in out_caption:
out_caption = out_caption.replace("person", "<tom_cruise>")
else:
out_caption = "<tom_cruise> " + out_caption
# For some reason, the model puts the word "arafed" for a human
if "arafed" in out_caption:
out_caption = out_caption.replace("arafed ", "")
image_captions_dict.append({"file_name": folder+image_name, "text": out_caption})
return image_captions_dict
def create_thumbs_up_person_dataset(path, cache_dir="/l/vision/v5/sragas/hf_models/"):
random.seed(15)
image_captions_dict = []
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large",
cache_dir=cache_dir,
torch_dtype=torch.float32).to("cuda")
# Caption the thumbs up images for prompts
image_paths = [path + "images/" + file for file in os.listdir(path+"images/")]
# Read from the person dataset
person_paths = [path + "tom_cruise_dataset/" + file for file in sorted(os.listdir(path+"tom_cruise_dataset/"))]
# If person is sachin, prompts are the filenames, use the below code.
# person_filenames = [filename.split("/")[-1] for filename in person_paths]
# person_captions = [filename.split(".")[0].replace("1", "<sachin>") for filename in person_filenames]
# persons_dict = [{"file_name": "sachin_dataset/" + filename, "text": caption} for filename, caption in zip(person_filenames, person_captions)]
# image_captions_dict.extend(persons_dict)
image_captions_dict.extend(caption_images(person_paths, processor, model, "tom_cruise_dataset/"))
image_captions_dict.extend(caption_images(image_paths, processor, model, "images/"))
# with open(f"{path}metadata.jsonl", 'w') as fp:
# json.dump(image_captions_dict, fp)
image_captions_dict = pd.DataFrame(image_captions_dict)
image_captions_dict.to_csv(f"{path}metadata.csv", index=False)
image_captions_dict.to_csv(f"metadata.csv", index=False)
if __name__ == "__main__":
images_dir = "/l/vision/v5/sragas/easel_ai/thumbs_up_dataset/"
create_thumbs_up_person_dataset(images_dir)
|