File size: 1,803 Bytes

0c9de77
db345b8
 
 
0c9de77
db345b8
 
 
4989ab6
032cd65
 
0c9de77
 
db345b8
 
 
0c9de77
db345b8
0c9de77
db345b8
 
0c9de77
db345b8
0c9de77
db345b8
 
0c9de77
db345b8
 
 
0c9de77
db345b8
 
 
0c9de77
db345b8
 
 
 
 
d114049
 
db345b8
 
 
 
0c9de77
db345b8
 
 
0c9de77
db345b8
 
 
0c9de77
db345b8
 
ed3af3d
db345b8

---
license: apache-2.0
language:
- en
library_name: transformers
pipeline_tag: image-text-to-text
tags:
- art
base_model: microsoft/Florence-2-large
datasets:
- kadirnar/fluxdev_controlnet_16k
---

```
pip install -q datasets flash_attn timm einops
```

```python

from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForCausalLM.from_pretrained("gokaygokay/Florence-2-Flux-Large", trust_remote_code=True).to(device).eval()
processor = AutoProcessor.from_pretrained("gokaygokay/Florence-2-Flux-Large", trust_remote_code=True)

# Function to run the model on an example
def run_example(task_prompt, text_input, image):
    prompt = task_prompt + text_input

    # Ensure the image is in RGB mode
    if image.mode != "RGB":
        image = image.convert("RGB")

    inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
    generated_ids = model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        num_beams=3,
        repetition_penalty=1.10,
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
    return parsed_answer

from PIL import Image
import requests
import copy

url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
image = Image.open(requests.get(url, stream=True).raw)
answer = run_example("<DESCRIPTION>", "Describe this image in great detail.", image)

final_answer = answer["<DESCRIPTION>"]
print(final_answer)
   
```