File size: 1,717 Bytes

0c9de77
db345b8
 
 
0c9de77
db345b8
 
 
0c9de77
 
db345b8
 
 
0c9de77
db345b8
0c9de77
db345b8
 
0c9de77
db345b8
0c9de77
db345b8
 
0c9de77
db345b8
 
 
0c9de77
db345b8
 
 
0c9de77
db345b8
 
 
 
 
d114049
 
db345b8
 
 
 
0c9de77
db345b8
 
 
0c9de77
db345b8
 
 
0c9de77
db345b8
 
0c9de77
db345b8

---
license: apache-2.0
language:
- en
library_name: transformers
pipeline_tag: image-text-to-text
tags:
- art
---

```
pip install -q datasets flash_attn timm einops
```

```python

from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForCausalLM.from_pretrained("gokaygokay/Florence-2-Flux-Large", trust_remote_code=True).to(device).eval()
processor = AutoProcessor.from_pretrained("gokaygokay/Florence-2-Flux-Large", trust_remote_code=True)

# Function to run the model on an example
def run_example(task_prompt, text_input, image):
    prompt = task_prompt + text_input

    # Ensure the image is in RGB mode
    if image.mode != "RGB":
        image = image.convert("RGB")

    inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
    generated_ids = model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        num_beams=3,
        repetition_penalty=1.10,
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
    return parsed_answer

from PIL import Image
import requests
import copy

url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
image = Image.open(requests.get(url, stream=True).raw)
answer = run_example("<DESCRIPTION>", "Describe this image in great detail.", image)

final_answer = answer["<DESCRIPTION>"]
print(final_answer)

```