image-to-prompt

Running

File size: 2,041 Bytes

40a0c27
fe109d0
4ca9103
87e9ce2
 
 
643466f
 
56c9cf4
87e9ce2
 
 
 
56c9cf4
87e9ce2
 
 
157d6c4
58a7b11
87e9ce2
 
 
 
 
 
 
 
 
 
 
56c9cf4
87e9ce2
 
157d6c4
 
 
 
 
fa8e3c4
411ddb3
56c9cf4
 
 
029098a
87e9ce2

import gradio as gr
import subprocess
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM

subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

# Инициализация модели Florence
device = "cuda" if torch.cuda.is_available() else "cpu"
florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)

def generate_ad_post(image):
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)
    
    # Более конкретный текст запроса для генерации рекламного поста
    inputs = florence_processor(text="describe absolutely all the details are in this image.  ", images=image, return_tensors="pt").to(device)
    generated_ids = florence_model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        early_stopping=False,
        do_sample=False,
        num_beams=3,
    )
    generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = florence_processor.post_process_generation(
        generated_text,
        task="Создание рекламного поста",
        image_size=(image.width, image.height)
    )
    
    # Извлечение текста из ответа
    prompt = parsed_answer.get("Создание рекламного поста", generated_text)
    
    print("\n\nГенерация завершена!:" + prompt)
    return prompt

io = gr.Interface(generate_ad_post,
                  inputs=[gr.Image(label="Входное изображение")],
                  outputs=[gr.Textbox(label="Рекламный пост", lines=2, show_copy_button=True)]
                 )
io.launch(debug=True)