File size: 1,803 Bytes
0c9de77 db345b8 0c9de77 db345b8 4989ab6 032cd65 0c9de77 db345b8 0c9de77 db345b8 0c9de77 db345b8 0c9de77 db345b8 0c9de77 db345b8 0c9de77 db345b8 0c9de77 db345b8 0c9de77 db345b8 d114049 db345b8 0c9de77 db345b8 0c9de77 db345b8 0c9de77 db345b8 ed3af3d db345b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
---
license: apache-2.0
language:
- en
library_name: transformers
pipeline_tag: image-text-to-text
tags:
- art
base_model: microsoft/Florence-2-large
datasets:
- kadirnar/fluxdev_controlnet_16k
---
```
pip install -q datasets flash_attn timm einops
```
```python
from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained("gokaygokay/Florence-2-Flux-Large", trust_remote_code=True).to(device).eval()
processor = AutoProcessor.from_pretrained("gokaygokay/Florence-2-Flux-Large", trust_remote_code=True)
# Function to run the model on an example
def run_example(task_prompt, text_input, image):
prompt = task_prompt + text_input
# Ensure the image is in RGB mode
if image.mode != "RGB":
image = image.convert("RGB")
inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
generated_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
num_beams=3,
repetition_penalty=1.10,
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
return parsed_answer
from PIL import Image
import requests
import copy
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
image = Image.open(requests.get(url, stream=True).raw)
answer = run_example("<DESCRIPTION>", "Describe this image in great detail.", image)
final_answer = answer["<DESCRIPTION>"]
print(final_answer)
``` |