Every-Text / app.py
ginipick's picture
Update app.py
ca31bc0 verified
raw
history blame
19.2 kB
import os
import re
import time
from os import path
import tempfile
import io
import random
import string
import torch
from PIL import Image
from transformers import pipeline
from safetensors.torch import load_file
from huggingface_hub import hf_hub_download
import gradio as gr
from diffusers import FluxPipeline
# (Internal) text-modification library
from google import genai
from google.genai import types
#######################################
# 0. Environment & Translation Pipeline
#######################################
BASE_DIR = path.dirname(path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
CACHE_PATH = path.join(BASE_DIR, "models")
os.environ["TRANSFORMERS_CACHE"] = CACHE_PATH
os.environ["HF_HUB_CACHE"] = CACHE_PATH
os.environ["HF_HOME"] = CACHE_PATH
# Translation (Korean -> English), CPU only
translator = pipeline(
task="translation",
model="Helsinki-NLP/opus-mt-ko-en",
device=-1 # force CPU
)
def maybe_translate_to_english(text: str) -> str:
"""
If the prompt contains any Korean characters, translate to English.
Otherwise, return as-is.
"""
import re
if re.search("[๊ฐ€-ํžฃ]", text):
translated = translator(text)[0]["translation_text"]
print(f"[TRANSLATE] Detected Korean -> '{text}' -> '{translated}'")
return translated
return text
# Simple Timer Class
class timer:
def __init__(self, method_name="timed process"):
self.method = method_name
def __enter__(self):
self.start = time.time()
print(f"[TIMER] {self.method} starts")
def __exit__(self, exc_type, exc_val, exc_tb):
end = time.time()
print(f"[TIMER] {self.method} took {round(end - self.start, 2)}s")
#######################################
# 1. Load FLUX Pipeline
#######################################
if not path.exists(CACHE_PATH):
os.makedirs(CACHE_PATH, exist_ok=True)
pipe = FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev",
torch_dtype=torch.bfloat16
)
# ์˜ˆ์‹œ์šฉ LoRA ๋‹ค์šด๋กœ๋“œ & ํ•ฉ์น˜๊ธฐ
lora_path = hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors")
pipe.load_lora_weights(lora_path)
pipe.fuse_lora(lora_scale=0.125)
pipe.to(device="cuda", dtype=torch.bfloat16)
#######################################
# 2. Internal Text Modification Functions
#######################################
def save_binary_file(file_name, data):
with open(file_name, "wb") as f:
f.write(data)
def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
"""
- ์ถ”๊ฐ€ ์ง€์‹œ์‚ฌํ•ญ(AIP)์„ ์ „๋‹ฌํ•ด ์ด๋ฏธ์ง€ ๊ธฐ๋ฐ˜ ํŽธ์ง‘์„ ์ˆ˜ํ–‰.
- ์‘๋‹ต์ด '์ด๋ฏธ์ง€'๋ฉด ์ €์žฅ, 'ํ…์ŠคํŠธ'๋ฉด ๋ˆ„์ ํ•˜์—ฌ ๋ฐ˜ํ™˜.
"""
# ๊ธฐ์กด API ํ‚ค ๋กœ์ง ์œ ์ง€ (ํ™˜๊ฒฝ ๋ณ€์ˆ˜ GAPI_TOKEN ์‚ฌ์šฉ)
api_key = os.getenv("GAPI_TOKEN", None)
if not api_key:
raise ValueError("GAPI_TOKEN is missing. Please set an API key.")
client = genai.Client(api_key=api_key)
files = [client.files.upload(file=file_name)]
contents = [
types.Content(
role="user",
parts=[
types.Part.from_uri(
file_uri=files[0].uri,
mime_type=files[0].mime_type,
),
types.Part.from_text(text=text),
],
),
]
generate_content_config = types.GenerateContentConfig(
temperature=1,
top_p=0.95,
top_k=40,
max_output_tokens=8192,
response_modalities=["image", "text"],
response_mime_type="text/plain",
)
text_response = ""
image_path = None
# ์ž„์‹œ ํŒŒ์ผ์— ์ด๋ฏธ์ง€ ์ €์žฅ ๊ฐ€๋Šฅํ•˜๋„๋ก ์ค€๋น„
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
temp_path = tmp.name
for chunk in client.models.generate_content_stream(
model=model,
contents=contents,
config=generate_content_config,
):
if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
continue
candidate = chunk.candidates[0].content.parts[0]
# ๋งŒ์•ฝ inline_data(์ด๋ฏธ์ง€ ๋ฐ์ดํ„ฐ)๊ฐ€ ์žˆ๋‹ค๋ฉด -> ์‹ค์ œ ์ด๋ฏธ์ง€ ํŽธ์ง‘ ๊ฒฐ๊ณผ
if candidate.inline_data:
save_binary_file(temp_path, candidate.inline_data.data)
print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path}")
image_path = temp_path
# ์ด๋ฏธ์ง€ ํ•œ ์žฅ๋งŒ ํ™•๋ณดํ•˜๋ฉด ์ค‘๋‹จ
break
else:
# inline_data๊ฐ€ ์—†์œผ๋ฉด ํ…์ŠคํŠธ ๋ฐ์ดํ„ฐ์ด๋ฏ€๋กœ ๋ˆ„์ 
text_response += chunk.text + "\n"
del files
return image_path, text_response
#######################################
# 3. Diffusion Utility
#######################################
def generate_random_letters(length: int) -> str:
"""
Create a random sequence of uppercase/lowercase letters of given length.
"""
letters = string.ascii_lowercase + string.ascii_uppercase
return "".join(random.choice(letters) for _ in range(length))
def is_all_english(text: str) -> bool:
"""
Check if text consists only of English letters (a-z, A-Z), digits, spaces,
and basic punctuation. If so, return True; otherwise False.
"""
import re
return bool(re.match(r'^[a-zA-Z0-9\s\.,!\?\']*$', text))
def maybe_use_random_or_original(final_text: str) -> str:
"""
If final_text is strictly English/allowed chars, use it as-is.
Else replace with random letters of the same length.
"""
if not final_text:
return ""
if is_all_english(final_text):
return final_text
else:
return generate_random_letters(len(final_text))
def fill_prompt_with_random_texts(prompt: str, r1: str, r2: str, r3: str) -> str:
"""
Replace <text1>, <text2>, <text3> placeholders with r1, r2, r3.
"""
if "<text1>" in prompt:
prompt = prompt.replace("<text1>", r1)
else:
prompt = f"{prompt} with clear readable text that says '{r1}'"
if "<text2>" in prompt:
prompt = prompt.replace("<text2>", r2)
if "<text3>" in prompt:
prompt = prompt.replace("<text3>", r3)
return prompt
def generate_initial_image(prompt, height, width, steps, scale, seed):
"""
Use Flux Pipeline to generate the initial image from the prompt.
"""
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("Flux Generation"):
result = pipe(
prompt=[prompt],
generator=torch.Generator().manual_seed(int(seed)),
num_inference_steps=int(steps),
guidance_scale=float(scale),
height=int(height),
width=int(width),
max_sequence_length=256
).images[0]
return result
#######################################
# 4. Creating 2 Final Images
#######################################
def change_text_in_image_two_times(original_image, instruction):
"""
Call the text-modification API twice, returning 2 final variations.
"""
results = []
for version_tag in ["(A)", "(B)"]:
mod_instruction = f"{instruction} {version_tag}"
try:
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
original_path = tmp.name
original_image.save(original_path)
image_path, text_response = generate_by_google_genai(
text=mod_instruction,
file_name=original_path
)
if image_path:
with open(image_path, "rb") as f:
image_data = f.read()
new_img = Image.open(io.BytesIO(image_data))
results.append(new_img)
else:
# ๋งŒ์•ฝ ์ด๋ฏธ์ง€ ์‘๋‹ต์ด ์—†๊ณ , ํ…์ŠคํŠธ๋งŒ ์˜จ ๊ฒฝ์šฐ
print("[WARNING] No image returned. text_response=", text_response)
results.append(original_image)
except Exception as e:
raise gr.Error(f"Error: {e}")
return results
#######################################
# 5. Main Process (Generation from Prompt)
#######################################
def run_process(
prompt,
final_text1,
final_text2,
final_text3,
height,
width,
steps,
scale,
seed
):
"""
1) Translate prompt if Korean -> English
2) For each text, if not English -> random
3) Generate initial image
4) Replace placeholders with real text via API (2 variations)
"""
# 1) Translate prompt if needed
prompt_en = maybe_translate_to_english(prompt)
# 2) Decide placeholders
r1 = maybe_use_random_or_original(final_text1)
r2 = maybe_use_random_or_original(final_text2)
r3 = maybe_use_random_or_original(final_text3)
print(f"[DEBUG] Using placeholders: r1='{r1}', r2='{r2}', r3='{r3}'")
# 3) Fill placeholders in prompt
final_prompt = fill_prompt_with_random_texts(prompt_en, r1, r2, r3)
print(f"[DEBUG] final_prompt = {final_prompt}")
# 4) Generate initial "random/original" image
_random_image = generate_initial_image(final_prompt, height, width, steps, scale, seed)
# Build final instructions (replace placeholders -> real text)
instructions = []
if r1 and final_text1:
instructions.append(f"Change any text reading '{r1}' in this image to '{final_text1}'.")
if r2 and final_text2:
instructions.append(f"Change any text reading '{r2}' in this image to '{final_text2}'.")
if r3 and final_text3:
instructions.append(f"Change any text reading '{r3}' in this image to '{final_text3}'.")
instruction = " ".join(instructions) if instructions else "No text changes needed."
# Call 2 variations
final_imgs = change_text_in_image_two_times(_random_image, instruction)
return [final_imgs[0], final_imgs[1]]
#######################################
# 5-2. Process for Editing Uploaded Image
#######################################
def run_edit_process(input_image, edit_prompt, final_text1):
"""
1) If final_text1 is empty => skip text replacement
2) Otherwise, combine edit_prompt + text-change instructions
3) Call 2 times for final images
"""
r1 = maybe_use_random_or_original(final_text1)
print(f"[DEBUG] Editing image with placeholder r1='{r1}'")
# *** ์ˆ˜์ • ํ•ต์‹ฌ ***
# final_text1์ด ๋น„์–ด ์žˆ์œผ๋ฉด ํ…์ŠคํŠธ ์น˜ํ™˜์„ ์ƒ๋žต,
# ์•„๋‹ˆ๋ฉด "Change any text reading 'r1' => final_text1" ๋ช…๋ น ์ถ”๊ฐ€
if not final_text1.strip():
instruction = f"{edit_prompt}"
else:
instruction = f"{edit_prompt}\nChange any text reading '{r1}' in this image to '{final_text1}'."
final_imgs = change_text_in_image_two_times(input_image, instruction)
return [final_imgs[0], final_imgs[1]]
#######################################
# 6. Gradio UI with Two Tabs
#######################################
with gr.Blocks(title="Eevery Text Imaginator: FLUX") as demo:
gr.Markdown(
"""
<style>
/* Set a gradient background for the entire page */
body {
background: linear-gradient(to right, #ffecd2, #fcb69f);
margin: 0;
padding: 0;
}
.gradio-container {
font-family: "Trebuchet MS", sans-serif;
color: #333;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
h2 {
color: #4CAF50;
}
p, label {
color: #5c6bc0;
}
.gr-button {
background-color: #fff176 !important;
color: #000 !important;
border: none !important;
margin-top: 10px !important;
}
.gr-button:hover {
background-color: #ffe100 !important;
}
.gr-examples > .label {
color: #d500f9;
}
</style>
<h2 style="text-align:center; margin-bottom: 15px;">
<strong>Eevery Text Imaginator: FLUX</strong>
</h2>
<p style="text-align:center;">
This tool generates <b>two final images</b> from a prompt
or an uploaded image, optionally containing placeholders
<code>&lt;text1&gt;</code>, <code>&lt;text2&gt;</code>, <code>&lt;text3&gt;</code>.
</p>
<hr style="margin: 15px 0;">
"""
)
with gr.Tabs():
###############################################
# Tab 1) Generate from Prompt
###############################################
with gr.TabItem("Generate from Prompt"):
with gr.Row():
with gr.Column():
with gr.Group():
prompt_input = gr.Textbox(
lines=3,
label="Prompt (Korean or English)",
placeholder="On a grand stage, <text1> in big letters..."
)
final_text1 = gr.Textbox(
label="New Text #1 (Required)",
placeholder="Example: HELLO or ์•ˆ๋…•ํ•˜์„ธ์š”"
)
final_text2 = gr.Textbox(
label="New Text #2 (Optional)",
placeholder="Example: WORLD or ๋ฐ˜๊ฐ‘์Šต๋‹ˆ๋‹ค"
)
final_text3 = gr.Textbox(
label="New Text #3 (Optional)",
placeholder="(Leave blank if not used)"
)
with gr.Accordion("Advanced Settings (optional)", open=False):
height = gr.Slider(
label="Height",
minimum=256,
maximum=1152,
step=64,
value=512
)
width = gr.Slider(
label="Width",
minimum=256,
maximum=1152,
step=64,
value=512
)
steps = gr.Slider(
label="Inference Steps",
minimum=6,
maximum=25,
step=1,
value=8
)
scale = gr.Slider(
label="Guidance Scale",
minimum=0.0,
maximum=10.0,
step=0.5,
value=3.5
)
seed = gr.Number(
label="Seed",
value=1234,
precision=0
)
run_btn = gr.Button("Generate 2 Final Images", variant="primary")
gr.Examples(
examples=[
[
"Futuristic neon sign with <text1>, plus near the bottom",
"OPEN", "", ""
],
[
"On a grand stage, <text1> in big letters and on the left side",
"ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค.", "", ""
],
[
"A classical poster reading <text1> in bold, as a subtitle",
"้”™่ง‰", "", ""
],
[
"In a cartoon style, a speech bubble with <text1> and another text",
"์•ˆ๋…•", "", ""
],
[
"Large billboard featuring <text1>",
"์•„๋ฆ„๋‹ค์šด ๋‹น์‹ ", "", ""
],
[
"์ฌ๊ธ€๋ผ์Šค ์ฐฉ์šฉํ•œ ํฐ์ƒ‰ ๊ณ ์–‘์ด์˜ ๋ฐฐ๋„ˆ <text1>",
"์•ˆ๋…•", "", ""
],
],
inputs=[prompt_input, final_text1, final_text2, final_text3],
label="Example Prompts"
)
with gr.Column():
final_image_output1 = gr.Image(
label="Final Image #1",
type="pil"
)
final_image_output2 = gr.Image(
label="Final Image #2",
type="pil"
)
# ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์ฒ˜๋ฆฌ
run_btn.click(
fn=run_process,
inputs=[
prompt_input,
final_text1,
final_text2,
final_text3,
height,
width,
steps,
scale,
seed
],
outputs=[final_image_output1, final_image_output2]
)
###############################################
# Tab 2) Edit Uploaded Image
###############################################
with gr.TabItem("Edit Uploaded Image"):
with gr.Row():
with gr.Column():
# Gradio ๊ตฌ๋ฒ„์ „ ํ˜ธํ™˜์„ ์œ„ํ•ด source="upload"๋Š” ์ œ๊ฑฐ
uploaded_image = gr.Image(
label="Upload Image for Editing",
type="pil"
)
edit_prompt = gr.Textbox(
label="Additional Instruction Prompt",
placeholder="(์˜ˆ: Make the background black, add sparkles, etc.)"
)
final_text1_edit = gr.Textbox(
label="Replace Text",
placeholder="Example: HELLO or ์•ˆ๋…•ํ•˜์„ธ์š”"
)
run_edit_btn = gr.Button("Edit Image", variant="primary")
with gr.Column():
edited_image_output1 = gr.Image(
label="Edited Image #1",
type="pil"
)
edited_image_output2 = gr.Image(
label="Edited Image #2",
type="pil"
)
# ์—…๋กœ๋“œ ์ด๋ฏธ์ง€ ํŽธ์ง‘ ์‹œ ์ฒ˜๋ฆฌ
run_edit_btn.click(
fn=run_edit_process,
inputs=[uploaded_image, edit_prompt, final_text1_edit],
outputs=[edited_image_output1, edited_image_output2]
)
demo.launch(max_threads=20)