import sys, types, importlib.machinery spec = importlib.machinery.ModuleSpec('flash_attn', loader=None) mod = types.ModuleType('flash_attn') mod.__spec__ = spec sys.modules['flash_attn'] = mod import transformers.utils.import_utils as _import_utils from transformers.utils import is_flash_attn_2_available _import_utils._is_package_available = lambda pkg: False _import_utils.is_flash_attn_2_available = lambda: False import huggingface_hub as _hf_hub _hf_hub.cached_download = _hf_hub.hf_hub_download import gradio as gr import torch import random from PIL import Image from transformers import AutoProcessor, AutoModelForCausalLM from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler REVISION = "ceaf371f01ef66192264811b390bccad475a4f02" # Florence-2 로드 device = "cuda" if torch.cuda.is_available() else "cpu" florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', revision = REVISION, trust_remote_code=True).to(device).eval() florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', revision = REVISION, trust_remote_code=True) # Stable Diffusion TurboX 로드 model_repo = "tensorart/stable-diffusion-3.5-large-TurboX" pipe = DiffusionPipeline.from_pretrained( model_repo, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 ) pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(model_repo, subfolder="scheduler", shift=5) pipe = pipe.to(device) MAX_SEED = 2**31 - 1 def pseudo_translate_to_korean_style(en_prompt: str) -> str: # 번역 없이 스타일 적용 return f"Cartoon styled {en_prompt} handsome or pretty people" def generate_prompt(image): """이미지 → 영어 설명 → 한국어 프롬프트 스타일로 변환""" if not isinstance(image, Image.Image): image = Image.fromarray(image) inputs = florence_processor(text="", images=image, return_tensors="pt").to(device) generated_ids = florence_model.generate( input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"], max_new_tokens=512, num_beams=3 ) generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0] parsed_answer = florence_processor.post_process_generation( generated_text, task="", image_size=(image.width, image.height) ) prompt_en = parsed_answer[""] # 번역기 없이 스타일 적용 cartoon_prompt = pseudo_translate_to_korean_style(prompt_en) return cartoon_prompt def generate_image(prompt, seed=42, randomize_seed=False): """텍스트 프롬프트 → 이미지 생성""" if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator().manual_seed(seed) image = pipe( prompt=prompt, negative_prompt="왜곡된 손, 흐림, 이상한 얼굴", guidance_scale=1.5, num_inference_steps=8, width=768, height=768, generator=generator ).images[0] return image, seed # Gradio UI 구성 with gr.Blocks() as demo: gr.Markdown("# 🖼 이미지 → 설명 생성 → 카툰 이미지 자동 생성기") gr.Markdown("**📌 사용법 안내 (한국어)**\n" "- 왼쪽에 이미지를 업로드하세요.\n" "- AI가 영어 설명을 만들고, 내부에서 한국어 스타일 프롬프트로 재구성합니다.\n" "- 오른쪽에 결과 이미지가 생성됩니다.") with gr.Row(): with gr.Column(): input_img = gr.Image(label="🎨 원본 이미지 업로드") run_button = gr.Button("✨ 생성 시작") with gr.Column(): prompt_out = gr.Textbox(label="📝 스타일 적용된 프롬프트", lines=3, show_copy_button=True) output_img = gr.Image(label="🎉 생성된 이미지") def full_process(img): prompt = generate_prompt(img) image, seed = generate_image(prompt, randomize_seed=True) return prompt, image run_button.click(fn=full_process, inputs=[input_img], outputs=[prompt_out, output_img]) demo.launch()