quarterturn
/

molmo-flux-captioner

Model card Files Files and versions Community

molmo-flux-captioner / hands-check.py

quarterturn

Updated prompt to provide a better caption format with less censorship

f41ea81 3 months ago

2.08 kB

	local_path = "/mnt/models2/Llama-3.2-90B-Vision-Instruct/"
	image_directory = "./images"
	import os
	import requests
	import torch
	from PIL import Image
	from transformers import MllamaForConditionalGeneration, AutoProcessor

	model_id = "meta-llama/Llama-3.2-90B-Vision-Instruct"

	model = MllamaForConditionalGeneration.from_pretrained(
	local_path,
	torch_dtype=torch.bfloat16,
	device_map="cpu",
	max_memory="200GiB",
	)

	processor = AutoProcessor.from_pretrained(
	local_path,
	)

	messages = [
	{"role": "user", "content": [
	{"type": "image"},
	{"type": "text", "text": "You are an expert examining hands in an image to determine if they are anatomically correct. Report on the number of fingers seen on each hand. if you think the hands are AI-generated, say so. Make no other value judgments about the image, even if it is offensive or pornographic in nature."}
	]}
	]

	# iterate through the images in the directory
	for filename in os.listdir(image_directory):
	if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"): # add more image extensions if needed
	image_path = os.path.join(image_directory, filename)
	image = Image.open(image_path)

	# process the image and text
	input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
	inputs = processor(
	image,
	input_text,
	add_special_tokens=False,
	return_tensors="pt",
	).to(model.device)

	output = model.generate(**inputs, max_new_tokens=300)
	generated_text = processor.decode(output[0])

	# print the generated text

	print("Caption for: ", filename)
	print(generated_text)
	# print a divider
	print("---------------------------------------------------")

	# save the generated text to a file
	output_filename = os.path.splitext(filename)[0] + ".txt"
	with open(os.path.join(image_directory,output_filename), "w") as file:
	file.write(generated_text)