Every-Text

Running

App Files Files Community

Every-Text / app.py

ginipick

Update app.py

ca31bc0 verified 4 months ago

raw

history blame

19.2 kB

	import os
	import re
	import time
	from os import path
	import tempfile
	import io
	import random
	import string

	import torch
	from PIL import Image

	from transformers import pipeline
	from safetensors.torch import load_file
	from huggingface_hub import hf_hub_download

	import gradio as gr
	from diffusers import FluxPipeline

	# (Internal) text-modification library
	from google import genai
	from google.genai import types

	#######################################
	# 0. Environment & Translation Pipeline
	#######################################

	BASE_DIR = path.dirname(path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
	CACHE_PATH = path.join(BASE_DIR, "models")

	os.environ["TRANSFORMERS_CACHE"] = CACHE_PATH
	os.environ["HF_HUB_CACHE"] = CACHE_PATH
	os.environ["HF_HOME"] = CACHE_PATH

	# Translation (Korean -> English), CPU only
	translator = pipeline(
	task="translation",
	model="Helsinki-NLP/opus-mt-ko-en",
	device=-1 # force CPU
	)

	def maybe_translate_to_english(text: str) -> str:
	"""
	If the prompt contains any Korean characters, translate to English.
	Otherwise, return as-is.
	"""
	import re
	if re.search("[가-힣]", text):
	translated = translator(text)[0]["translation_text"]
	print(f"[TRANSLATE] Detected Korean -> '{text}' -> '{translated}'")
	return translated
	return text

	# Simple Timer Class
	class timer:
	def __init__(self, method_name="timed process"):
	self.method = method_name
	def __enter__(self):
	self.start = time.time()
	print(f"[TIMER] {self.method} starts")
	def __exit__(self, exc_type, exc_val, exc_tb):
	end = time.time()
	print(f"[TIMER] {self.method} took {round(end - self.start, 2)}s")

	#######################################
	# 1. Load FLUX Pipeline
	#######################################

	if not path.exists(CACHE_PATH):
	os.makedirs(CACHE_PATH, exist_ok=True)

	pipe = FluxPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-dev",
	torch_dtype=torch.bfloat16
	)

	# 예시용 LoRA 다운로드 & 합치기
	lora_path = hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors")
	pipe.load_lora_weights(lora_path)
	pipe.fuse_lora(lora_scale=0.125)
	pipe.to(device="cuda", dtype=torch.bfloat16)

	#######################################
	# 2. Internal Text Modification Functions
	#######################################

	def save_binary_file(file_name, data):
	with open(file_name, "wb") as f:
	f.write(data)

	def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
	"""
	- 추가 지시사항(AIP)을 전달해 이미지 기반 편집을 수행.
	- 응답이 '이미지'면 저장, '텍스트'면 누적하여 반환.
	"""
	# 기존 API 키 로직 유지 (환경 변수 GAPI_TOKEN 사용)
	api_key = os.getenv("GAPI_TOKEN", None)
	if not api_key:
	raise ValueError("GAPI_TOKEN is missing. Please set an API key.")

	client = genai.Client(api_key=api_key)
	files = [client.files.upload(file=file_name)]

	contents = [
	types.Content(
	role="user",
	parts=[
	types.Part.from_uri(
	file_uri=files[0].uri,
	mime_type=files[0].mime_type,
	),
	types.Part.from_text(text=text),
	],
	),
	]

	generate_content_config = types.GenerateContentConfig(
	temperature=1,
	top_p=0.95,
	top_k=40,
	max_output_tokens=8192,
	response_modalities=["image", "text"],
	response_mime_type="text/plain",
	)

	text_response = ""
	image_path = None

	# 임시 파일에 이미지 저장 가능하도록 준비
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	temp_path = tmp.name
	for chunk in client.models.generate_content_stream(
	model=model,
	contents=contents,
	config=generate_content_config,
	):
	if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
	continue

	candidate = chunk.candidates[0].content.parts[0]
	# 만약 inline_data(이미지 데이터)가 있다면 -> 실제 이미지 편집 결과
	if candidate.inline_data:
	save_binary_file(temp_path, candidate.inline_data.data)
	print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path}")
	image_path = temp_path
	# 이미지 한 장만 확보하면 중단
	break
	else:
	# inline_data가 없으면 텍스트 데이터이므로 누적
	text_response += chunk.text + "\n"

	del files
	return image_path, text_response

	#######################################
	# 3. Diffusion Utility
	#######################################

	def generate_random_letters(length: int) -> str:
	"""
	Create a random sequence of uppercase/lowercase letters of given length.
	"""
	letters = string.ascii_lowercase + string.ascii_uppercase
	return "".join(random.choice(letters) for _ in range(length))

	def is_all_english(text: str) -> bool:
	"""
	Check if text consists only of English letters (a-z, A-Z), digits, spaces,
	and basic punctuation. If so, return True; otherwise False.
	"""
	import re
	return bool(re.match(r'^[a-zA-Z0-9\s\.,!\?\']*$', text))

	def maybe_use_random_or_original(final_text: str) -> str:
	"""
	If final_text is strictly English/allowed chars, use it as-is.
	Else replace with random letters of the same length.
	"""
	if not final_text:
	return ""
	if is_all_english(final_text):
	return final_text
	else:
	return generate_random_letters(len(final_text))

	def fill_prompt_with_random_texts(prompt: str, r1: str, r2: str, r3: str) -> str:
	"""
	Replace <text1>, <text2>, <text3> placeholders with r1, r2, r3.
	"""
	if "<text1>" in prompt:
	prompt = prompt.replace("<text1>", r1)
	else:
	prompt = f"{prompt} with clear readable text that says '{r1}'"

	if "<text2>" in prompt:
	prompt = prompt.replace("<text2>", r2)
	if "<text3>" in prompt:
	prompt = prompt.replace("<text3>", r3)
	return prompt

	def generate_initial_image(prompt, height, width, steps, scale, seed):
	"""
	Use Flux Pipeline to generate the initial image from the prompt.
	"""
	with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("Flux Generation"):
	result = pipe(
	prompt=[prompt],
	generator=torch.Generator().manual_seed(int(seed)),
	num_inference_steps=int(steps),
	guidance_scale=float(scale),
	height=int(height),
	width=int(width),
	max_sequence_length=256
	).images[0]
	return result

	#######################################
	# 4. Creating 2 Final Images
	#######################################

	def change_text_in_image_two_times(original_image, instruction):
	"""
	Call the text-modification API twice, returning 2 final variations.
	"""
	results = []
	for version_tag in ["(A)", "(B)"]:
	mod_instruction = f"{instruction} {version_tag}"
	try:
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	original_path = tmp.name
	original_image.save(original_path)

	image_path, text_response = generate_by_google_genai(
	text=mod_instruction,
	file_name=original_path
	)
	if image_path:
	with open(image_path, "rb") as f:
	image_data = f.read()
	new_img = Image.open(io.BytesIO(image_data))
	results.append(new_img)
	else:
	# 만약 이미지 응답이 없고, 텍스트만 온 경우
	print("[WARNING] No image returned. text_response=", text_response)
	results.append(original_image)
	except Exception as e:
	raise gr.Error(f"Error: {e}")
	return results

	#######################################
	# 5. Main Process (Generation from Prompt)
	#######################################

	def run_process(
	prompt,
	final_text1,
	final_text2,
	final_text3,
	height,
	width,
	steps,
	scale,
	seed
	):
	"""
	1) Translate prompt if Korean -> English
	2) For each text, if not English -> random
	3) Generate initial image
	4) Replace placeholders with real text via API (2 variations)
	"""
	# 1) Translate prompt if needed
	prompt_en = maybe_translate_to_english(prompt)

	# 2) Decide placeholders
	r1 = maybe_use_random_or_original(final_text1)
	r2 = maybe_use_random_or_original(final_text2)
	r3 = maybe_use_random_or_original(final_text3)
	print(f"[DEBUG] Using placeholders: r1='{r1}', r2='{r2}', r3='{r3}'")

	# 3) Fill placeholders in prompt
	final_prompt = fill_prompt_with_random_texts(prompt_en, r1, r2, r3)
	print(f"[DEBUG] final_prompt = {final_prompt}")

	# 4) Generate initial "random/original" image
	_random_image = generate_initial_image(final_prompt, height, width, steps, scale, seed)

	# Build final instructions (replace placeholders -> real text)
	instructions = []
	if r1 and final_text1:
	instructions.append(f"Change any text reading '{r1}' in this image to '{final_text1}'.")
	if r2 and final_text2:
	instructions.append(f"Change any text reading '{r2}' in this image to '{final_text2}'.")
	if r3 and final_text3:
	instructions.append(f"Change any text reading '{r3}' in this image to '{final_text3}'.")
	instruction = " ".join(instructions) if instructions else "No text changes needed."

	# Call 2 variations
	final_imgs = change_text_in_image_two_times(_random_image, instruction)
	return [final_imgs[0], final_imgs[1]]

	#######################################
	# 5-2. Process for Editing Uploaded Image
	#######################################
	def run_edit_process(input_image, edit_prompt, final_text1):
	"""
	1) If final_text1 is empty => skip text replacement
	2) Otherwise, combine edit_prompt + text-change instructions
	3) Call 2 times for final images
	"""
	r1 = maybe_use_random_or_original(final_text1)
	print(f"[DEBUG] Editing image with placeholder r1='{r1}'")

	# * 수정 핵심 *
	# final_text1이 비어 있으면 텍스트 치환을 생략,
	# 아니면 "Change any text reading 'r1' => final_text1" 명령 추가
	if not final_text1.strip():
	instruction = f"{edit_prompt}"
	else:
	instruction = f"{edit_prompt}\nChange any text reading '{r1}' in this image to '{final_text1}'."

	final_imgs = change_text_in_image_two_times(input_image, instruction)
	return [final_imgs[0], final_imgs[1]]

	#######################################
	# 6. Gradio UI with Two Tabs
	#######################################

	with gr.Blocks(title="Eevery Text Imaginator: FLUX") as demo:
	gr.Markdown(
	"""
	<style>
	/* Set a gradient background for the entire page */
	body {
	background: linear-gradient(to right, #ffecd2, #fcb69f);
	margin: 0;
	padding: 0;
	}
	.gradio-container {
	font-family: "Trebuchet MS", sans-serif;
	color: #333;
	max-width: 1200px;
	margin: 0 auto;
	padding: 20px;
	}
	h2 {
	color: #4CAF50;
	}
	p, label {
	color: #5c6bc0;
	}
	.gr-button {
	background-color: #fff176 !important;
	color: #000 !important;
	border: none !important;
	margin-top: 10px !important;
	}
	.gr-button:hover {
	background-color: #ffe100 !important;
	}
	.gr-examples > .label {
	color: #d500f9;
	}
	</style>
	<h2 style="text-align:center; margin-bottom: 15px;">
	<strong>Eevery Text Imaginator: FLUX</strong>
	</h2>

	<p style="text-align:center;">
	This tool generates <b>two final images</b> from a prompt
	or an uploaded image, optionally containing placeholders
	<code><text1></code>, <code><text2></code>, <code><text3></code>.
	</p>

	<hr style="margin: 15px 0;">
	"""
	)

	with gr.Tabs():
	###############################################
	# Tab 1) Generate from Prompt
	###############################################
	with gr.TabItem("Generate from Prompt"):
	with gr.Row():
	with gr.Column():
	with gr.Group():
	prompt_input = gr.Textbox(
	lines=3,
	label="Prompt (Korean or English)",
	placeholder="On a grand stage, <text1> in big letters..."
	)
	final_text1 = gr.Textbox(
	label="New Text #1 (Required)",
	placeholder="Example: HELLO or 안녕하세요"
	)
	final_text2 = gr.Textbox(
	label="New Text #2 (Optional)",
	placeholder="Example: WORLD or 반갑습니다"
	)
	final_text3 = gr.Textbox(
	label="New Text #3 (Optional)",
	placeholder="(Leave blank if not used)"
	)
	with gr.Accordion("Advanced Settings (optional)", open=False):
	height = gr.Slider(
	label="Height",
	minimum=256,
	maximum=1152,
	step=64,
	value=512
	)
	width = gr.Slider(
	label="Width",
	minimum=256,
	maximum=1152,
	step=64,
	value=512
	)
	steps = gr.Slider(
	label="Inference Steps",
	minimum=6,
	maximum=25,
	step=1,
	value=8
	)
	scale = gr.Slider(
	label="Guidance Scale",
	minimum=0.0,
	maximum=10.0,
	step=0.5,
	value=3.5
	)
	seed = gr.Number(
	label="Seed",
	value=1234,
	precision=0
	)
	run_btn = gr.Button("Generate 2 Final Images", variant="primary")

	gr.Examples(
	examples=[
	[
	"Futuristic neon sign with <text1>, plus near the bottom",
	"OPEN", "", ""
	],
	[
	"On a grand stage, <text1> in big letters and on the left side",
	"환영합니다.", "", ""
	],
	[
	"A classical poster reading <text1> in bold, as a subtitle",
	"错觉", "", ""
	],
	[
	"In a cartoon style, a speech bubble with <text1> and another text",
	"안녕", "", ""
	],
	[
	"Large billboard featuring <text1>",
	"아름다운 당신", "", ""
	],
	[
	"썬글라스 착용한 흰색 고양이의 배너 <text1>",
	"안녕", "", ""
	],
	],
	inputs=[prompt_input, final_text1, final_text2, final_text3],
	label="Example Prompts"
	)
	with gr.Column():
	final_image_output1 = gr.Image(
	label="Final Image #1",
	type="pil"
	)
	final_image_output2 = gr.Image(
	label="Final Image #2",
	type="pil"
	)

	# 버튼 클릭 시 처리
	run_btn.click(
	fn=run_process,
	inputs=[
	prompt_input,
	final_text1,
	final_text2,
	final_text3,
	height,
	width,
	steps,
	scale,
	seed
	],
	outputs=[final_image_output1, final_image_output2]
	)

	###############################################
	# Tab 2) Edit Uploaded Image
	###############################################
	with gr.TabItem("Edit Uploaded Image"):
	with gr.Row():
	with gr.Column():
	# Gradio 구버전 호환을 위해 source="upload"는 제거
	uploaded_image = gr.Image(
	label="Upload Image for Editing",
	type="pil"
	)
	edit_prompt = gr.Textbox(
	label="Additional Instruction Prompt",
	placeholder="(예: Make the background black, add sparkles, etc.)"
	)
	final_text1_edit = gr.Textbox(
	label="Replace Text",
	placeholder="Example: HELLO or 안녕하세요"
	)
	run_edit_btn = gr.Button("Edit Image", variant="primary")
	with gr.Column():
	edited_image_output1 = gr.Image(
	label="Edited Image #1",
	type="pil"
	)
	edited_image_output2 = gr.Image(
	label="Edited Image #2",
	type="pil"
	)

	# 업로드 이미지 편집 시 처리
	run_edit_btn.click(
	fn=run_edit_process,
	inputs=[uploaded_image, edit_prompt, final_text1_edit],
	outputs=[edited_image_output1, edited_image_output2]
	)

	demo.launch(max_threads=20)