Spaces:
Running
on
Zero
Running
on
Zero
feat: Enable MCP
Browse filesHello! This is an automated PR adding MCP compatibility to your AI App 🤖.
This PR introduces two improvements:
1. Adds docstrings to the functions in the app file that are directly connected to the Gradio UI, for the downstream LLM to use.
2. Enables the Model-Compute-Platform by adding `mcp_server=True` to the `.launch()` call.
No other logic has been changed. Please review and merge if it looks good!Learn more about MCP compatibility in Spaces here: https://huggingface.co/changelog/add-compatible-spaces-to-your-mcp-tools
app.py
CHANGED
@@ -1,202 +1,222 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
import
|
8 |
-
from
|
9 |
-
import
|
10 |
-
from
|
11 |
-
|
12 |
-
import
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
gr.
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
from datetime import datetime
|
4 |
+
import torch
|
5 |
+
from PIL import Image
|
6 |
+
|
7 |
+
import spaces
|
8 |
+
from huggingface_hub import snapshot_download
|
9 |
+
from pipeline_calligrapher import CalligrapherPipeline
|
10 |
+
from models.calligrapher import Calligrapher
|
11 |
+
from models.transformer_flux_inpainting import FluxTransformer2DModel
|
12 |
+
from utils import process_gradio_source, get_bbox_from_mask, crop_image_from_bb, \
|
13 |
+
resize_img_and_pad, generate_context_reference_image
|
14 |
+
|
15 |
+
|
16 |
+
# Function of loading pre-trained models.
|
17 |
+
def load_models():
|
18 |
+
snapshot_download(
|
19 |
+
repo_id="Calligrapher2025/Calligrapher",
|
20 |
+
allow_patterns="calligrapher.bin",
|
21 |
+
local_dir="./",
|
22 |
+
)
|
23 |
+
print("calligrapher.bin successfully downloaded!")
|
24 |
+
transformer = FluxTransformer2DModel.from_pretrained("black-forest-labs/FLUX.1-Fill-dev",
|
25 |
+
subfolder="transformer",
|
26 |
+
torch_dtype=torch.bfloat16)
|
27 |
+
pipe = CalligrapherPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev",
|
28 |
+
transformer=transformer,
|
29 |
+
torch_dtype=torch.bfloat16).to("cuda")
|
30 |
+
model = Calligrapher(pipe,
|
31 |
+
image_encoder_path="google/siglip-so400m-patch14-384",
|
32 |
+
calligrapher_path="calligrapher.bin",
|
33 |
+
device="cuda", num_tokens=128)
|
34 |
+
return model
|
35 |
+
|
36 |
+
|
37 |
+
# Init models.
|
38 |
+
model = load_models()
|
39 |
+
print('Model loaded!')
|
40 |
+
|
41 |
+
|
42 |
+
@spaces.GPU()
|
43 |
+
def process_and_generate(editor_component, reference_image, prompt, height, width,
|
44 |
+
scale, steps=50, seed=42, use_context=True, num_images=1):
|
45 |
+
"""
|
46 |
+
Process input images and generate customized text images using the Calligrapher model.
|
47 |
+
|
48 |
+
This function handles the complete pipeline from processing user inputs through the image editor,
|
49 |
+
preparing reference images, applying masks, and generating multiple customized text images
|
50 |
+
based on the provided parameters.
|
51 |
+
|
52 |
+
Args:
|
53 |
+
editor_component: Gradio ImageEditor component containing the source image and mask drawings.
|
54 |
+
reference_image: PIL Image object of the reference style image, or None for self-reference.
|
55 |
+
prompt: String describing the desired text content.
|
56 |
+
height: Integer height of the output image in pixels.
|
57 |
+
width: Integer width of the output image in pixels.
|
58 |
+
scale: Float value controlling the generation strength (0.0 to 2.0).
|
59 |
+
steps: Integer number of inference steps for the generation process (default: 50).
|
60 |
+
seed: Integer random seed for reproducible generation (default: 42).
|
61 |
+
use_context: Boolean flag to include context reference in generation (default: True).
|
62 |
+
num_images: Integer number of images to generate (default: 1).
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
Tuple containing:
|
66 |
+
- mask_vis: PIL Image of the processed mask (with context removed if applicable).
|
67 |
+
- reference_image_to_encoder: PIL Image of the resized reference image used by the encoder.
|
68 |
+
- all_generated_images: List of tuples, each containing (generated_image, caption_string).
|
69 |
+
"""
|
70 |
+
print('Begin processing!')
|
71 |
+
# Get source, mask, and cropped images from gr.ImageEditor.
|
72 |
+
source_image, mask_image, cropped_image = process_gradio_source(editor_component)
|
73 |
+
|
74 |
+
# Resize source and mask.
|
75 |
+
source_image = source_image.resize((width, height))
|
76 |
+
mask_image = mask_image.resize((width, height), Image.NEAREST)
|
77 |
+
mask_np = np.array(mask_image)
|
78 |
+
mask_np[mask_np > 0] = 255
|
79 |
+
mask_image = Image.fromarray(mask_np.astype(np.uint8))
|
80 |
+
|
81 |
+
if reference_image is None:
|
82 |
+
# If self-inpaint (no input ref): (1) get bounding box from the mask and (2) perform cropping to get the ref image.
|
83 |
+
tl, br = get_bbox_from_mask(mask_image)
|
84 |
+
# Convert irregularly shaped masks into rectangles.
|
85 |
+
reference_image = crop_image_from_bb(source_image, tl, br)
|
86 |
+
# Raw reference image before resizing.
|
87 |
+
reference_image_to_encoder = resize_img_and_pad(reference_image, target_size=(512, 512))
|
88 |
+
|
89 |
+
if use_context:
|
90 |
+
reference_context = generate_context_reference_image(reference_image, width)
|
91 |
+
# Concat the context on the top of the input masked image in the pixel space.
|
92 |
+
source_with_context = Image.new(source_image.mode, (width, reference_context.size[1] + height))
|
93 |
+
source_with_context.paste(reference_context, (0, 0))
|
94 |
+
source_with_context.paste(source_image, (0, reference_context.size[1]))
|
95 |
+
# Concat the zero mask on the top of the mask image.
|
96 |
+
mask_with_context = Image.new(mask_image.mode,
|
97 |
+
(mask_image.size[0],
|
98 |
+
reference_context.size[1] + mask_image.size[0]),
|
99 |
+
color=0)
|
100 |
+
mask_with_context.paste(mask_image, (0, reference_context.size[1]))
|
101 |
+
|
102 |
+
source_image = source_with_context
|
103 |
+
mask_image = mask_with_context
|
104 |
+
|
105 |
+
all_generated_images = []
|
106 |
+
for i in range(num_images):
|
107 |
+
res = model.generate(
|
108 |
+
image=source_image,
|
109 |
+
mask_image=mask_image,
|
110 |
+
ref_image=reference_image_to_encoder,
|
111 |
+
prompt=prompt,
|
112 |
+
scale=scale,
|
113 |
+
num_inference_steps=steps,
|
114 |
+
width=source_image.size[0],
|
115 |
+
height=source_image.size[1],
|
116 |
+
seed=seed + i,
|
117 |
+
)[0]
|
118 |
+
if use_context:
|
119 |
+
res_vis = res.crop((0, reference_context.size[1], res.width, res.height)) # remove context
|
120 |
+
mask_vis = mask_image.crop(
|
121 |
+
(0, reference_context.size[1], mask_image.width, mask_image.height)) # remove context mask
|
122 |
+
else:
|
123 |
+
res_vis = res
|
124 |
+
mask_vis = mask_image
|
125 |
+
all_generated_images.append((res_vis, f"Generating {i + 1} (Seed: {seed + i})"))
|
126 |
+
|
127 |
+
return mask_vis, reference_image_to_encoder, all_generated_images
|
128 |
+
|
129 |
+
|
130 |
+
# Main gradio codes.
|
131 |
+
with gr.Blocks(theme="default", css=".image-editor img {max-width: 70%; height: 70%;}") as demo:
|
132 |
+
gr.Markdown(
|
133 |
+
"""
|
134 |
+
# 🖌️ Calligrapher: Freestyle Text Image Customization    [[Code]](https://github.com/Calligrapher2025/Calligrapher) [[Project Page]](https://calligrapher2025.github.io/Calligrapher/)
|
135 |
+
### Consider giving a star to the [project](https://github.com/Calligrapher2025/Calligrapher) if you find it useful!
|
136 |
+
"""
|
137 |
+
)
|
138 |
+
|
139 |
+
with gr.Row():
|
140 |
+
with gr.Column(scale=3):
|
141 |
+
gr.Markdown("### 🎨 Image Editing Panel")
|
142 |
+
editor_component = gr.ImageEditor(
|
143 |
+
label="Upload or Draw",
|
144 |
+
type="pil",
|
145 |
+
brush=gr.Brush(colors=["#FFFFFF"], default_size=30, color_mode="fixed"),
|
146 |
+
layers=True,
|
147 |
+
interactive=True,
|
148 |
+
)
|
149 |
+
|
150 |
+
gr.Markdown("### 📤 Output Result")
|
151 |
+
gallery = gr.Gallery(label="🖼️ Result Gallery")
|
152 |
+
gr.Markdown(
|
153 |
+
"""<br>
|
154 |
+
|
155 |
+
### ✨User Tips:
|
156 |
+
|
157 |
+
1. **Speed vs Quality Trade-off.** Use fewer steps (e.g., 10-step which takes ~4s/image on a single A6000 GPU) for faster generation, but quality may be lower.
|
158 |
+
|
159 |
+
2. **Inpaint Position Freedom.** Inpainting positions are flexible - they don't necessarily need to match the original text locations in the input image.
|
160 |
+
|
161 |
+
3. **Iterative Editing.** Drag outputs from the gallery to the Image Editing Panel (clean the Editing Panel first) for quick refinements.
|
162 |
+
|
163 |
+
4. **Mask Optimization.** Adjust mask size/aspect ratio to match your desired content. The model tends to fill the masks, and harmonizes the generation with background in terms of color and lighting.
|
164 |
+
|
165 |
+
5. **Reference Image Tip.** White-background references improve style consistency - the encoder also considers background context of the given reference image.
|
166 |
+
|
167 |
+
6. **Resolution Balance.** Very high-resolution generation sometimes triggers spelling errors. 512/768px are recommended considering the model is trained under the resolution of 512.
|
168 |
+
"""
|
169 |
+
)
|
170 |
+
with gr.Column(scale=1):
|
171 |
+
gr.Markdown("### ⚙️Settings")
|
172 |
+
reference_image = gr.Image(
|
173 |
+
label="🧩 Reference Image (skip this if self-reference)",
|
174 |
+
sources=["upload"],
|
175 |
+
type="pil",
|
176 |
+
)
|
177 |
+
prompt = gr.Textbox(
|
178 |
+
label="📝 Prompt",
|
179 |
+
placeholder="The text is 'Image'...",
|
180 |
+
value="The text is 'Image'."
|
181 |
+
)
|
182 |
+
|
183 |
+
with gr.Accordion("🔧 Additional Settings", open=True):
|
184 |
+
with gr.Row():
|
185 |
+
height = gr.Number(label="Height", value=512, precision=0)
|
186 |
+
width = gr.Number(label="Width", value=512, precision=0)
|
187 |
+
scale = gr.Slider(0.0, 2.0, 1.0, step=0.1, value=1.0, label="🎚️ Strength")
|
188 |
+
steps = gr.Slider(1, 100, 50, step=1, label="🔁 Steps")
|
189 |
+
with gr.Row():
|
190 |
+
seed = gr.Number(label="🎲 Seed", value=56, precision=0)
|
191 |
+
use_context = gr.Checkbox(value=True, label="🔍 Use Context", interactive=True)
|
192 |
+
num_images = gr.Slider(1, 16, 2, step=1, label="🖼️ Sample Amount")
|
193 |
+
|
194 |
+
run_btn = gr.Button("🚀 Run", variant="primary")
|
195 |
+
|
196 |
+
mask_output = gr.Image(label="🟩 Mask Demo")
|
197 |
+
reference_demo = gr.Image(label="🧩 Reference Demo")
|
198 |
+
|
199 |
+
# Run button event.
|
200 |
+
run_btn.click(
|
201 |
+
fn=process_and_generate,
|
202 |
+
inputs=[
|
203 |
+
editor_component,
|
204 |
+
reference_image,
|
205 |
+
prompt,
|
206 |
+
height,
|
207 |
+
width,
|
208 |
+
scale,
|
209 |
+
steps,
|
210 |
+
seed,
|
211 |
+
use_context,
|
212 |
+
num_images
|
213 |
+
],
|
214 |
+
outputs=[
|
215 |
+
mask_output,
|
216 |
+
reference_demo,
|
217 |
+
gallery
|
218 |
+
]
|
219 |
+
)
|
220 |
+
|
221 |
+
if __name__ == "__main__":
|
222 |
+
demo.launch(mcp_server=True)
|