FLUX-Prompt-Generator123

Runtime error

File size: 16,081 Bytes

# 필요한 라이브러리 임포트
import gradio as gr
import random
import json
import os
import re
from datetime import datetime
from huggingface_hub import InferenceClient
import subprocess
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM
import random
import openai  # OpenAI API 라이브러리 추가

subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

huggingface_token = os.getenv("HUGGINGFACE_TOKEN")

# OpenAI API 클라이언트 설정
openai.api_key = os.getenv("OPENAI_API_KEY")

# Initialize Florence model
device = "cuda" if torch.cuda.is_available() else "cpu"
florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)

# Florence caption function
def florence_caption(image):
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)
    
    inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
    generated_ids = florence_model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        early_stopping=False,
        do_sample=False,
        num_beams=3,
    )
    generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = florence_processor.post_process_generation(
        generated_text,
        task="<MORE_DETAILED_CAPTION>",
        image_size=(image.width, image.height)
    )
    return parsed_answer["<MORE_DETAILED_CAPTION>"]

# JSON 파일 로드 함수
def load_json_file(file_name):
    file_path = os.path.join("data", file_name)
    with open(file_path, "r") as file:
        return json.load(file)

ARTFORM = load_json_file("artform.json")
PHOTO_TYPE = load_json_file("photo_type.json")
BODY_TYPES = load_json_file("body_types.json")
DEFAULT_TAGS = load_json_file("default_tags.json")
ROLES = load_json_file("roles.json")
HAIRSTYLES = load_json_file("hairstyles.json")
ADDITIONAL_DETAILS = load_json_file("additional_details.json")
PHOTOGRAPHY_STYLES = load_json_file("photography_styles.json")
DEVICE = load_json_file("device.json")
PHOTOGRAPHER = load_json_file("photographer.json")
ARTIST = load_json_file("artist.json")
DIGITAL_ARTFORM = load_json_file("digital_artform.json")
PLACE = load_json_file("place.json")
LIGHTING = load_json_file("lighting.json")
CLOTHING = load_json_file("clothing.json")
COMPOSITION = load_json_file("composition.json")
POSE = load_json_file("pose.json")
BACKGROUND = load_json_file("background.json")

# PromptGenerator 클래스 정의
class PromptGenerator:
    def __init__(self, seed=None):
        self.rng = random.Random(seed)

    def split_and_choose(self, input_str):
        choices = [choice.strip() for choice in input_str.split(",")]
        return self.rng.choices(choices, k=1)[0]

    def get_choice(self, input_str, default_choices):
        if input_str.lower() == "disabled":
            return ""
        elif "," in input_str:
            return self.split_and_choose(input_str)
        elif input_str.lower() == "random":
            return self.rng.choices(default_choices, k=1)[0]
        else:
            return input_str

    def clean_consecutive_commas(self, input_string):
        cleaned_string = re.sub(r',\s*,', ',', input_string)
        return cleaned_string

    def process_string(self, replaced, seed):
        replaced = re.sub(r'\s*,\s*', ',', replaced)
        replaced = re.sub(r',+', ',', replaced)
        original = replaced
        
        first_break_clipl_index = replaced.find("BREAK_CLIPL")
        second_break_clipl_index = replaced.find("BREAK_CLIPL", first_break_clipl_index + len("BREAK_CLIPL"))
        
        if first_break_clipl_index != -1 and second_break_clipl_index != -1:
            clip_content_l = replaced[first_break_clipl_index + len("BREAK_CLIPL"):second_break_clipl_index]
            replaced = replaced[:first_break_clipl_index].strip(", ") + replaced[second_break_clipl_index + len("BREAK_CLIPL"):].strip(", ")
            clip_l = clip_content_l
        else:
            clip_l = ""
        
        first_break_clipg_index = replaced.find("BREAK_CLIPG")
        second_break_clipg_index = replaced.find("BREAK_CLIPG", first_break_clipg_index + len("BREAK_CLIPG"))
        
        if first_break_clipg_index != -1 and second_break_clipg_index != -1:
            clip_content_g = replaced[first_break_clipg_index + len("BREAK_CLIPG"):second_break_clipg_index]
            replaced = replaced[:first_break_clipg_index].strip(", ") + replaced[second_break_clipg_index + len("BREAK_CLIPG"):].strip(", ")
            clip_g = clip_content_g
        else:
            clip_g = ""
        
        t5xxl = replaced
        
        original = original.replace("BREAK_CLIPL", "").replace("BREAK_CLIPG", "")
        original = re.sub(r'\s*,\s*', ',', original)
        original = re.sub(r',+', ',', original)
        clip_l = re.sub(r'\s*,\s*', ',', clip_l)
        clip_l = re.sub(r',+', ',', clip_l)
        clip_g = re.sub(r'\s*,\s*', ',', clip_g)
        clip_g = re.sub(r',+', ',', clip_g)
        if clip_l.startswith(","):
            clip_l = clip_l[1:]
        if clip_g.startswith(","):
            clip_g = clip_g[1:]
        if original.startswith(","):
            original = original[1:]
        if t5xxl.startswith(","):
            t5xxl = t5xxl[1:]

        return original, seed, t5xxl, clip_l, clip_g

    def generate_prompt(self, seed, custom, subject, artform, photo_type, body_types, default_tags, roles, hairstyles,
                        additional_details, photography_styles, device, photographer, artist, digital_artform,
                        place, lighting, clothing, composition, pose, background, input_image):
        # 생략된 기능들...
        pass
    
    def add_caption_to_prompt(self, prompt, caption):
        if caption:
            return f"{prompt}, {caption}"
        return prompt

# HuggingFace 모델을 사용한 텍스트 생성 클래스 정의
class HuggingFaceInferenceNode:
    def __init__(self):
        self.clients = {
            "Mixtral": InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"),
            "Mistral": InferenceClient("mistralai/Mistral-7B-Instruct-v0.3"),
            "Llama 3": InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct"),
            "Mistral-Nemo": InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
        }
        self.prompts_dir = "./prompts"
        os.makedirs(self.prompts_dir, exist_ok=True)

    def save_prompt(self, prompt):
        filename_text = "hf_" + prompt.split(',')[0].strip()
        filename_text = re.sub(r'[^\w\-_\. ]', '_', filename_text)
        filename_text = filename_text[:30]  
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        base_filename = f"{filename_text}_{timestamp}.txt"
        filename = os.path.join(self.prompts_dir, base_filename)
        
        with open(filename, "w") as file:
            file.write(prompt)
        
        print(f"Prompt saved to {filename}")

    def generate(self, model, input_text, happy_talk, compress, compression_level, poster, custom_base_prompt=""):
        # 생략된 기능들...
        pass

# gpt-4o-mini와 Cohere Command R+를 사용한 프롬프트 생성 함수
def call_gpt4o_mini(content, system_message, max_tokens=1000, temperature=0.7, top_p=1):
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": content},
        ],
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
    )
    return response.choices[0].message['content']

def call_cohere(content, temperature=0.7, max_tokens=1000):
    response = openai.ChatCompletion.create(
        model="Cohere-Command-R+",
        messages=[
            {"role": "user", "content": content},
        ],
        max_tokens=max_tokens,
        temperature=temperature,
    )
    return response.choices[0].message['content']


# Gradio 인터페이스 생성 함수
def create_interface():
    prompt_generator = PromptGenerator()  # PromptGenerator 클래스가 정의되었으므로 사용 가능
    huggingface_node = HuggingFaceInferenceNode()

    with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo:
        
        gr.HTML("""<h1 align="center">FLUX 프롬프트 생성기</h1>
                   <p><center>이미지 또는 간단한 텍스트에서 긴 프롬프트를 생성합니다. 짧은 프롬프트를 개선합니다.</center></p>""")

        with gr.Row():
            with gr.Column(scale=2):
                with gr.Accordion("기본 설정"):
                    seed = gr.Number(label="시드", value=random.randint(0, 1000000))
                    custom = gr.Textbox(label="사용자 정의 입력 프롬프트 (선택사항)")
                    subject = gr.Textbox(label="주제 (선택사항)")
                    global_option = gr.Radio(["비활성화", "랜덤"], label="모든 옵션 설정:", value="비활성화")
                
                with gr.Accordion("예술 형식 및 사진 유형", open=False):
                    artform = gr.Dropdown(["비활성화", "랜덤"] + ARTFORM, label="예술 형식", value="비활성화")
                    photo_type = gr.Dropdown(["비활성화", "랜덤"] + PHOTO_TYPE, label="사진 유형", value="비활성화")
            
                with gr.Accordion("캐릭터 세부사항", open=False):
                    body_types = gr.Dropdown(["비활성화", "랜덤"] + BODY_TYPES, label="체형", value="비활성화")
                    default_tags = gr.Dropdown(["비활성화", "랜덤"] + DEFAULT_TAGS, label="기본 태그", value="비활성화")
                    roles = gr.Dropdown(["비활성화", "랜덤"] + ROLES, label="역할", value="비활성화")
                    hairstyles = gr.Dropdown(["비활성화", "랜덤"] + HAIRSTYLES, label="헤어스타일", value="비활성화")
                    clothing = gr.Dropdown(["비활성화", "랜덤"] + CLOTHING, label="의상", value="비활성화")
            
                with gr.Accordion("장면 세부사항", open=False):
                    place = gr.Dropdown(["비활성화", "랜덤"] + PLACE, label="장소", value="비활성화")
                    lighting = gr.Dropdown(["비활성화", "랜덤"] + LIGHTING, label="조명", value="비활성화")
                    composition = gr.Dropdown(["비활성화", "랜덤"] + COMPOSITION, label="구성", value="비활성화")
                    pose = gr.Dropdown(["비활성화", "랜덤"] + POSE, label="포즈", value="비활성화")
                    background = gr.Dropdown(["비활성화", "랜덤"] + BACKGROUND, label="배경", value="비활성화")
            
                with gr.Accordion("스타일 및 아티스트", open=False):
                    additional_details = gr.Dropdown(["비활성화", "랜덤"] + ADDITIONAL_DETAILS, label="추가 세부 사항", value="비활성화")
                    photography_styles = gr.Dropdown(["비활성화", "랜덤"] + PHOTOGRAPHY_STYLES, label="사진 스타일", value="비활성화")
                    device = gr.Dropdown(["비활성화", "랜덤"] + DEVICE, label="장비", value="비활성화")
                    photographer = gr.Dropdown(["비활성화", "랜덤"] + PHOTOGRAPHER, label="사진작가", value="비활성화")
                    artist = gr.Dropdown(["비활성화", "랜덤"] + ARTIST, label="아티스트", value="비활성화")
                    digital_artform = gr.Dropdown(["비활성화", "랜덤"] + DIGITAL_ARTFORM, label="디지털 예술 형식", value="비활성화")
                
                generate_button = gr.Button("프롬프트 생성")

            with gr.Column(scale=2):
                with gr.Accordion("이미지 및 설명", open=False):
                    input_image = gr.Image(label="입력 이미지 (선택사항)")
                    caption_output = gr.Textbox(label="생성된 설명", lines=3)
                    create_caption_button = gr.Button("설명 생성")
                    add_caption_button = gr.Button("프롬프트에 설명 추가")

                with gr.Accordion("프롬프트 생성", open=True):
                    output = gr.Textbox(label="생성된 프롬프트 / 입력 텍스트", lines=4)
                    t5xxl_output = gr.Textbox(label="T5XXL 출력", visible=True)
                    clip_l_output = gr.Textbox(label="CLIP L 출력", visible=True)
                    clip_g_output = gr.Textbox(label="CLIP G 출력", visible=True)
            
            with gr.Column(scale=2):
                with gr.Accordion("LLM을 사용한 프롬프트 생성", open=False):
                    model = gr.Dropdown(["Mixtral", "Mistral", "Llama 3", "Mistral-Nemo", "gpt-4o-mini", "Cohere-Command-R+"], label="모델", value="Llama 3")
                    happy_talk = gr.Checkbox(label="행복한 대화", value=True)
                    compress = gr.Checkbox(label="압축", value=True)
                    compression_level = gr.Radio(["부드럽게", "중간", "강하게"], label="압축 레벨", value="강하게")
                    poster = gr.Checkbox(label="포스터 형식", value=False)
                    custom_base_prompt = gr.Textbox(label="사용자 정의 기본 프롬프트", lines=5)
                generate_text_button = gr.Button("LLM으로 프롬프트 생성")
                text_output = gr.Textbox(label="생성된 텍스트", lines=10)

        def create_caption(image):
            if image is not None:
                return florence_caption(image)
            return ""

        create_caption_button.click(
            create_caption,
            inputs=[input_image],
            outputs=[caption_output]
        )

        generate_button.click(
            prompt_generator.generate_prompt,
            inputs=[seed, custom, subject, artform, photo_type, body_types, default_tags, roles, hairstyles,
                    additional_details, photography_styles, device, photographer, artist, digital_artform,
                    place, lighting, clothing, composition, pose, background],
            outputs=[output, gr.Number(visible=False), t5xxl_output, clip_l_output, clip_g_output]
        )

        add_caption_button.click(
            prompt_generator.add_caption_to_prompt,
            inputs=[output, caption_output],
            outputs=[output]
        )

        generate_text_button.click(
            lambda model, input_text, happy_talk, compress, compression_level, poster, custom_base_prompt: call_gpt4o_mini(input_text, custom_base_prompt) if model == "gpt-4o-mini" else call_cohere(input_text),
            inputs=[model, output, happy_talk, compress, compression_level, poster, custom_base_prompt],
            outputs=text_output
        )

        def update_all_options(choice):
            return {dropdown: gr.update(value=choice) for dropdown in [
                artform, photo_type, body_types, default_tags, roles, hairstyles, clothing,
                place, lighting, composition, pose, background, additional_details,
                photography_styles, device, photographer, artist, digital_artform
            ]}

        global_option.change(
            update_all_options,
            inputs=[global_option],
            outputs=[
                artform, photo_type, body_types, default_tags, roles, hairstyles, clothing,
                place, lighting, composition, pose, background, additional_details,
                photography_styles, device, photographer, artist, digital_artform
            ]
        )

    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch()