pip install -q torch==2.4.0 datasets flash_attn timm einops

from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForCausalLM.from_pretrained("gokaygokay/Florence-2-Flux", trust_remote_code=True).to(device).eval()
processor = AutoProcessor.from_pretrained("gokaygokay/Florence-2-Flux", trust_remote_code=True)

# Function to run the model on an example
def run_example(task_prompt, text_input, image):
    prompt = task_prompt + text_input

    # Ensure the image is in RGB mode
    if image.mode != "RGB":
        image = image.convert("RGB")

    inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
    generated_ids = model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        num_beams=3,
        repetition_penalty=1.10,
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
    return parsed_answer

from PIL import Image
import requests
import copy

url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
image = Image.open(requests.get(url, stream=True).raw)
answer = run_example("<DESCRIPTION>", "Describe this image in great detail.", image)

final_answer = answer["<DESCRIPTION>"]
print(final_answer)
  
Downloads last month
699
Safetensors
Model size
271M params
Tensor type
F32
Β·
Inference API
Inference API (serverless) does not yet support model repos that contain custom code.

Model tree for gokaygokay/Florence-2-Flux

Finetuned
(5)
this model

Dataset used to train gokaygokay/Florence-2-Flux

Spaces using gokaygokay/Florence-2-Flux 7