🎨 Webtoon Studio
Generate webtoon-style images and add text with various styles and positions.
import os import gc import uuid import random import tempfile import time from datetime import datetime from typing import Any from huggingface_hub import login, hf_hub_download import spaces import gradio as gr import numpy as np import torch from PIL import Image, ImageDraw, ImageFont from diffusers import FluxPipeline from transformers import pipeline # 메모리 정리 함수 def clear_memory(): gc.collect() try: if torch.cuda.is_available(): with torch.cuda.device(0): torch.cuda.empty_cache() except: pass # GPU 설정 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): try: with torch.cuda.device(0): torch.cuda.empty_cache() torch.backends.cudnn.benchmark = True torch.backends.cuda.matmul.allow_tf32 = True except: print("Warning: Could not configure CUDA settings") # HF 토큰 설정 HF_TOKEN = os.getenv("HF_TOKEN") if HF_TOKEN is None: raise ValueError("Please set the HF_TOKEN environment variable") try: login(token=HF_TOKEN) except Exception as e: raise ValueError(f"Failed to login to Hugging Face: {str(e)}") translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en", device=-1) # CPU에서 실행 def translate_to_english(text: str) -> str: """한글 텍스트를 영어로 번역""" try: if any(ord('가') <= ord(char) <= ord('힣') for char in text): translated = translator(text, max_length=128)[0]['translation_text'] print(f"Translated '{text}' to '{translated}'") return translated return text except Exception as e: print(f"Translation error: {str(e)}") return text # FLUX 파이프라인 초기화 부분 수정 print("Initializing FLUX pipeline...") try: pipe = FluxPipeline.from_pretrained( "black-forest-labs/FLUX.1-dev", torch_dtype=torch.float16, use_auth_token=HF_TOKEN ) print("FLUX pipeline initialized successfully") # 메모리 최적화 설정 pipe.enable_attention_slicing(slice_size=1) # GPU 설정 if torch.cuda.is_available(): pipe = pipe.to("cuda:0") torch.cuda.empty_cache() torch.backends.cudnn.benchmark = True torch.backends.cuda.matmul.allow_tf32 = True print("Pipeline optimization settings applied") except Exception as e: print(f"Error initializing FLUX pipeline: {str(e)}") raise # LoRA 가중치 로드 부분 수정 print("Loading LoRA weights...") try: # 로컬 LoRA 파일의 절대 경로 확인 current_dir = os.path.dirname(os.path.abspath(__file__)) lora_path = os.path.join(current_dir, "myt-flux-fantasy.safetensors") if not os.path.exists(lora_path): raise FileNotFoundError(f"LoRA file not found at: {lora_path}") print(f"Loading LoRA weights from: {lora_path}") # LoRA 가중치 로드 pipe.load_lora_weights(lora_path) pipe.fuse_lora(lora_scale=0.75) # lora_scale 값 조정 # 메모리 정리 torch.cuda.empty_cache() gc.collect() print("LoRA weights loaded and fused successfully") print(f"Current device: {pipe.device}") except Exception as e: print(f"Error loading LoRA weights: {str(e)}") print(f"Full error details: {repr(e)}") raise ValueError(f"Failed to load LoRA weights: {str(e)}") @spaces.GPU(duration=60) def generate_image( prompt: str, seed: int, randomize_seed: bool, width: int, height: int, guidance_scale: float, num_inference_steps: int, progress: gr.Progress = gr.Progress() ): try: clear_memory() translated_prompt = translate_to_english(prompt) print(f"Processing prompt: {translated_prompt}") if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator(device=device).manual_seed(seed) print(f"Current device: {pipe.device}") print(f"Starting image generation...") with torch.inference_mode(), torch.cuda.amp.autocast(enabled=True): image = pipe( prompt=translated_prompt, width=width, height=height, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=generator, num_images_per_prompt=1, ).images[0] filepath = save_generated_image(image, translated_prompt) print(f"Image generated and saved to: {filepath}") return image, seed except Exception as e: print(f"Generation error: {str(e)}") print(f"Full error details: {repr(e)}") raise gr.Error(f"Image generation failed: {str(e)}") finally: clear_memory() # 저장 디렉토리 설정 SAVE_DIR = "saved_images" if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR, exist_ok=True) MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 1024 def save_generated_image(image, prompt): timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") unique_id = str(uuid.uuid4())[:8] filename = f"{timestamp}_{unique_id}.png" filepath = os.path.join(SAVE_DIR, filename) image.save(filepath) return filepath def add_text_with_stroke(draw, text, x, y, font, text_color, stroke_width): """텍스트에 외곽선을 추가하는 함수""" for adj_x in range(-stroke_width, stroke_width + 1): for adj_y in range(-stroke_width, stroke_width + 1): draw.text((x + adj_x, y + adj_y), text, font=font, fill=text_color) def add_text_to_image( input_image, text, font_size, color, opacity, x_position, y_position, thickness, text_position_type, font_choice ): try: if input_image is None or text.strip() == "": return input_image if not isinstance(input_image, Image.Image): if isinstance(input_image, np.ndarray): image = Image.fromarray(input_image) else: raise ValueError("Unsupported image type") else: image = input_image.copy() if image.mode != 'RGBA': image = image.convert('RGBA') font_files = { "Default": "DejaVuSans.ttf", "Korean Regular": "ko-Regular.ttf" } try: font_file = font_files.get(font_choice, "DejaVuSans.ttf") font = ImageFont.truetype(font_file, int(font_size)) except Exception as e: print(f"Font loading error ({font_choice}): {str(e)}") font = ImageFont.load_default() color_map = { 'White': (255, 255, 255), 'Black': (0, 0, 0), 'Red': (255, 0, 0), 'Green': (0, 255, 0), 'Blue': (0, 0, 255), 'Yellow': (255, 255, 0), 'Purple': (128, 0, 128) } rgb_color = color_map.get(color, (255, 255, 255)) temp_draw = ImageDraw.Draw(image) text_bbox = temp_draw.textbbox((0, 0), text, font=font) text_width = text_bbox[2] - text_bbox[0] text_height = text_bbox[3] - text_bbox[1] actual_x = int((image.width - text_width) * (x_position / 100)) actual_y = int((image.height - text_height) * (y_position / 100)) text_color = (*rgb_color, int(opacity)) txt_overlay = Image.new('RGBA', image.size, (255, 255, 255, 0)) draw = ImageDraw.Draw(txt_overlay) add_text_with_stroke( draw, text, actual_x, actual_y, font, text_color, int(thickness) ) output_image = Image.alpha_composite(image, txt_overlay) output_image = output_image.convert('RGB') return output_image except Exception as e: print(f"Error in add_text_to_image: {str(e)}") return input_image css = """ footer {display: none} .main-title { text-align: center; margin: 1em 0; padding: 1.5em; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); border-radius: 15px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); } .main-title h1 { color: #2196F3; font-size: 2.8em; margin-bottom: 0.3em; font-weight: 700; } .main-title p { color: #555; font-size: 1.3em; line-height: 1.4; } .container { max-width: 1200px; margin: auto; padding: 20px; } .input-panel, .output-panel { background: white; padding: 1.5em; border-radius: 12px; box-shadow: 0 2px 8px rgba(0,0,0,0.08); margin-bottom: 1em; } """ import requests def enhance_prompt(prompt: str) -> str: """프롬프트를 애니메이션 스타일로 증강""" try: # 기본 품질 향상 프롬프트 추가 enhancements = [ "masterpiece, best quality, highly detailed", "anime style, animation style", "vibrant colors, perfect lighting", "professional composition", "dynamic pose, expressive features", "detailed background, perfect shadows", "[trigger]" ] # 애니메이션 스타일 프롬프트 변환 anime_style_prompt = f"an animated {prompt}, detailed anime art style" # 최종 프롬프트 구성 final_prompt = f"{anime_style_prompt}, {', '.join(enhancements)}" print(f"Enhanced prompt: {final_prompt}") return final_prompt except Exception as e: print(f"Prompt enhancement failed: {str(e)}") return prompt # 기존의 pipeline 초기화 부분 제거 # try: # prompt_enhancer = pipeline(...) # except Exception as e: # print(f"Error initializing prompt enhancer: {str(e)}") # prompt_enhancer = None with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: gr.HTML("""
Generate webtoon-style images and add text with various styles and positions.