multimodalart HF Staff commited on
Commit
7a07741
·
verified ·
1 Parent(s): 4583098

feat: Enable MCP

Browse files

Hello! This is an automated PR adding MCP compatibility to your AI App 🤖.

![image.png](https://cdn-uploads.huggingface.co/production/uploads/624bebf604abc7ebb01789af/HQQK38I_MDXLDMYDYBq8H.png)This PR introduces two improvements:
1. Adds docstrings to the functions in the app file that are directly connected to the Gradio UI, for the downstream LLM to use.
2. Enables the Model-Compute-Platform by adding `mcp_server=True` to the `.launch()` call.

No other logic has been changed. Please review and merge if it looks good!Learn more about MCP compatibility in Spaces here: https://huggingface.co/changelog/add-compatible-spaces-to-your-mcp-tools

Files changed (1) hide show
  1. app.py +222 -202
app.py CHANGED
@@ -1,202 +1,222 @@
1
- """
2
- Gradio demo for text customization with Calligrapher (the reference is uploaded by the user).
3
-
4
- """
5
-
6
- import gradio as gr
7
- import numpy as np
8
- from datetime import datetime
9
- import torch
10
- from PIL import Image
11
-
12
- import spaces
13
- from huggingface_hub import snapshot_download
14
- from pipeline_calligrapher import CalligrapherPipeline
15
- from models.calligrapher import Calligrapher
16
- from models.transformer_flux_inpainting import FluxTransformer2DModel
17
- from utils import process_gradio_source, get_bbox_from_mask, crop_image_from_bb, \
18
- resize_img_and_pad, generate_context_reference_image
19
-
20
-
21
- # Function of loading pre-trained models.
22
- def load_models():
23
- snapshot_download(
24
- repo_id="Calligrapher2025/Calligrapher",
25
- allow_patterns="calligrapher.bin",
26
- local_dir="./",
27
- )
28
- print("calligrapher.bin successfully downloaded!")
29
- transformer = FluxTransformer2DModel.from_pretrained("black-forest-labs/FLUX.1-Fill-dev",
30
- subfolder="transformer",
31
- torch_dtype=torch.bfloat16)
32
- pipe = CalligrapherPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev",
33
- transformer=transformer,
34
- torch_dtype=torch.bfloat16).to("cuda")
35
- model = Calligrapher(pipe,
36
- image_encoder_path="google/siglip-so400m-patch14-384",
37
- calligrapher_path="calligrapher.bin",
38
- device="cuda", num_tokens=128)
39
- return model
40
-
41
-
42
- # Init models.
43
- model = load_models()
44
- print('Model loaded!')
45
-
46
-
47
- @spaces.GPU()
48
- def process_and_generate(editor_component, reference_image, prompt, height, width,
49
- scale, steps=50, seed=42, use_context=True, num_images=1):
50
- print('Begin processing!')
51
- # Get source, mask, and cropped images from gr.ImageEditor.
52
- source_image, mask_image, cropped_image = process_gradio_source(editor_component)
53
-
54
- # Resize source and mask.
55
- source_image = source_image.resize((width, height))
56
- mask_image = mask_image.resize((width, height), Image.NEAREST)
57
- mask_np = np.array(mask_image)
58
- mask_np[mask_np > 0] = 255
59
- mask_image = Image.fromarray(mask_np.astype(np.uint8))
60
-
61
- if reference_image is None:
62
- # If self-inpaint (no input ref): (1) get bounding box from the mask and (2) perform cropping to get the ref image.
63
- tl, br = get_bbox_from_mask(mask_image)
64
- # Convert irregularly shaped masks into rectangles.
65
- reference_image = crop_image_from_bb(source_image, tl, br)
66
- # Raw reference image before resizing.
67
- reference_image_to_encoder = resize_img_and_pad(reference_image, target_size=(512, 512))
68
-
69
- if use_context:
70
- reference_context = generate_context_reference_image(reference_image, width)
71
- # Concat the context on the top of the input masked image in the pixel space.
72
- source_with_context = Image.new(source_image.mode, (width, reference_context.size[1] + height))
73
- source_with_context.paste(reference_context, (0, 0))
74
- source_with_context.paste(source_image, (0, reference_context.size[1]))
75
- # Concat the zero mask on the top of the mask image.
76
- mask_with_context = Image.new(mask_image.mode,
77
- (mask_image.size[0],
78
- reference_context.size[1] + mask_image.size[0]),
79
- color=0)
80
- mask_with_context.paste(mask_image, (0, reference_context.size[1]))
81
-
82
- source_image = source_with_context
83
- mask_image = mask_with_context
84
-
85
- all_generated_images = []
86
- for i in range(num_images):
87
- res = model.generate(
88
- image=source_image,
89
- mask_image=mask_image,
90
- ref_image=reference_image_to_encoder,
91
- prompt=prompt,
92
- scale=scale,
93
- num_inference_steps=steps,
94
- width=source_image.size[0],
95
- height=source_image.size[1],
96
- seed=seed + i,
97
- )[0]
98
- if use_context:
99
- res_vis = res.crop((0, reference_context.size[1], res.width, res.height)) # remove context
100
- mask_vis = mask_image.crop(
101
- (0, reference_context.size[1], mask_image.width, mask_image.height)) # remove context mask
102
- else:
103
- res_vis = res
104
- mask_vis = mask_image
105
- all_generated_images.append((res_vis, f"Generating {i + 1} (Seed: {seed + i})"))
106
-
107
- return mask_vis, reference_image_to_encoder, all_generated_images
108
-
109
-
110
- # Main gradio codes.
111
- with gr.Blocks(theme="default", css=".image-editor img {max-width: 70%; height: 70%;}") as demo:
112
- gr.Markdown(
113
- """
114
- # 🖌️ Calligrapher: Freestyle Text Image Customization    [[Code]](https://github.com/Calligrapher2025/Calligrapher) [[Project Page]](https://calligrapher2025.github.io/Calligrapher/)
115
- ### Consider giving a star to the [project](https://github.com/Calligrapher2025/Calligrapher) if you find it useful!
116
- """
117
- )
118
-
119
- with gr.Row():
120
- with gr.Column(scale=3):
121
- gr.Markdown("### 🎨 Image Editing Panel")
122
- editor_component = gr.ImageEditor(
123
- label="Upload or Draw",
124
- type="pil",
125
- brush=gr.Brush(colors=["#FFFFFF"], default_size=30, color_mode="fixed"),
126
- layers=True,
127
- interactive=True,
128
- )
129
-
130
- gr.Markdown("### 📤 Output Result")
131
- gallery = gr.Gallery(label="🖼️ Result Gallery")
132
- gr.Markdown(
133
- """<br>
134
-
135
- ### ✨User Tips:
136
-
137
- 1. **Speed vs Quality Trade-off.** Use fewer steps (e.g., 10-step which takes ~4s/image on a single A6000 GPU) for faster generation, but quality may be lower.
138
-
139
- 2. **Inpaint Position Freedom.** Inpainting positions are flexible - they don't necessarily need to match the original text locations in the input image.
140
-
141
- 3. **Iterative Editing.** Drag outputs from the gallery to the Image Editing Panel (clean the Editing Panel first) for quick refinements.
142
-
143
- 4. **Mask Optimization.** Adjust mask size/aspect ratio to match your desired content. The model tends to fill the masks, and harmonizes the generation with background in terms of color and lighting.
144
-
145
- 5. **Reference Image Tip.** White-background references improve style consistency - the encoder also considers background context of the given reference image.
146
-
147
- 6. **Resolution Balance.** Very high-resolution generation sometimes triggers spelling errors. 512/768px are recommended considering the model is trained under the resolution of 512.
148
- """
149
- )
150
- with gr.Column(scale=1):
151
- gr.Markdown("### ⚙️Settings")
152
- reference_image = gr.Image(
153
- label="🧩 Reference Image (skip this if self-reference)",
154
- sources=["upload"],
155
- type="pil",
156
- )
157
- prompt = gr.Textbox(
158
- label="📝 Prompt",
159
- placeholder="The text is 'Image'...",
160
- value="The text is 'Image'."
161
- )
162
-
163
- with gr.Accordion("🔧 Additional Settings", open=True):
164
- with gr.Row():
165
- height = gr.Number(label="Height", value=512, precision=0)
166
- width = gr.Number(label="Width", value=512, precision=0)
167
- scale = gr.Slider(0.0, 2.0, 1.0, step=0.1, value=1.0, label="🎚️ Strength")
168
- steps = gr.Slider(1, 100, 50, step=1, label="🔁 Steps")
169
- with gr.Row():
170
- seed = gr.Number(label="🎲 Seed", value=56, precision=0)
171
- use_context = gr.Checkbox(value=True, label="🔍 Use Context", interactive=True)
172
- num_images = gr.Slider(1, 16, 2, step=1, label="🖼️ Sample Amount")
173
-
174
- run_btn = gr.Button("🚀 Run", variant="primary")
175
-
176
- mask_output = gr.Image(label="🟩 Mask Demo")
177
- reference_demo = gr.Image(label="🧩 Reference Demo")
178
-
179
- # Run button event.
180
- run_btn.click(
181
- fn=process_and_generate,
182
- inputs=[
183
- editor_component,
184
- reference_image,
185
- prompt,
186
- height,
187
- width,
188
- scale,
189
- steps,
190
- seed,
191
- use_context,
192
- num_images
193
- ],
194
- outputs=[
195
- mask_output,
196
- reference_demo,
197
- gallery
198
- ]
199
- )
200
-
201
- if __name__ == "__main__":
202
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from datetime import datetime
4
+ import torch
5
+ from PIL import Image
6
+
7
+ import spaces
8
+ from huggingface_hub import snapshot_download
9
+ from pipeline_calligrapher import CalligrapherPipeline
10
+ from models.calligrapher import Calligrapher
11
+ from models.transformer_flux_inpainting import FluxTransformer2DModel
12
+ from utils import process_gradio_source, get_bbox_from_mask, crop_image_from_bb, \
13
+ resize_img_and_pad, generate_context_reference_image
14
+
15
+
16
+ # Function of loading pre-trained models.
17
+ def load_models():
18
+ snapshot_download(
19
+ repo_id="Calligrapher2025/Calligrapher",
20
+ allow_patterns="calligrapher.bin",
21
+ local_dir="./",
22
+ )
23
+ print("calligrapher.bin successfully downloaded!")
24
+ transformer = FluxTransformer2DModel.from_pretrained("black-forest-labs/FLUX.1-Fill-dev",
25
+ subfolder="transformer",
26
+ torch_dtype=torch.bfloat16)
27
+ pipe = CalligrapherPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev",
28
+ transformer=transformer,
29
+ torch_dtype=torch.bfloat16).to("cuda")
30
+ model = Calligrapher(pipe,
31
+ image_encoder_path="google/siglip-so400m-patch14-384",
32
+ calligrapher_path="calligrapher.bin",
33
+ device="cuda", num_tokens=128)
34
+ return model
35
+
36
+
37
+ # Init models.
38
+ model = load_models()
39
+ print('Model loaded!')
40
+
41
+
42
+ @spaces.GPU()
43
+ def process_and_generate(editor_component, reference_image, prompt, height, width,
44
+ scale, steps=50, seed=42, use_context=True, num_images=1):
45
+ """
46
+ Process input images and generate customized text images using the Calligrapher model.
47
+
48
+ This function handles the complete pipeline from processing user inputs through the image editor,
49
+ preparing reference images, applying masks, and generating multiple customized text images
50
+ based on the provided parameters.
51
+
52
+ Args:
53
+ editor_component: Gradio ImageEditor component containing the source image and mask drawings.
54
+ reference_image: PIL Image object of the reference style image, or None for self-reference.
55
+ prompt: String describing the desired text content.
56
+ height: Integer height of the output image in pixels.
57
+ width: Integer width of the output image in pixels.
58
+ scale: Float value controlling the generation strength (0.0 to 2.0).
59
+ steps: Integer number of inference steps for the generation process (default: 50).
60
+ seed: Integer random seed for reproducible generation (default: 42).
61
+ use_context: Boolean flag to include context reference in generation (default: True).
62
+ num_images: Integer number of images to generate (default: 1).
63
+
64
+ Returns:
65
+ Tuple containing:
66
+ - mask_vis: PIL Image of the processed mask (with context removed if applicable).
67
+ - reference_image_to_encoder: PIL Image of the resized reference image used by the encoder.
68
+ - all_generated_images: List of tuples, each containing (generated_image, caption_string).
69
+ """
70
+ print('Begin processing!')
71
+ # Get source, mask, and cropped images from gr.ImageEditor.
72
+ source_image, mask_image, cropped_image = process_gradio_source(editor_component)
73
+
74
+ # Resize source and mask.
75
+ source_image = source_image.resize((width, height))
76
+ mask_image = mask_image.resize((width, height), Image.NEAREST)
77
+ mask_np = np.array(mask_image)
78
+ mask_np[mask_np > 0] = 255
79
+ mask_image = Image.fromarray(mask_np.astype(np.uint8))
80
+
81
+ if reference_image is None:
82
+ # If self-inpaint (no input ref): (1) get bounding box from the mask and (2) perform cropping to get the ref image.
83
+ tl, br = get_bbox_from_mask(mask_image)
84
+ # Convert irregularly shaped masks into rectangles.
85
+ reference_image = crop_image_from_bb(source_image, tl, br)
86
+ # Raw reference image before resizing.
87
+ reference_image_to_encoder = resize_img_and_pad(reference_image, target_size=(512, 512))
88
+
89
+ if use_context:
90
+ reference_context = generate_context_reference_image(reference_image, width)
91
+ # Concat the context on the top of the input masked image in the pixel space.
92
+ source_with_context = Image.new(source_image.mode, (width, reference_context.size[1] + height))
93
+ source_with_context.paste(reference_context, (0, 0))
94
+ source_with_context.paste(source_image, (0, reference_context.size[1]))
95
+ # Concat the zero mask on the top of the mask image.
96
+ mask_with_context = Image.new(mask_image.mode,
97
+ (mask_image.size[0],
98
+ reference_context.size[1] + mask_image.size[0]),
99
+ color=0)
100
+ mask_with_context.paste(mask_image, (0, reference_context.size[1]))
101
+
102
+ source_image = source_with_context
103
+ mask_image = mask_with_context
104
+
105
+ all_generated_images = []
106
+ for i in range(num_images):
107
+ res = model.generate(
108
+ image=source_image,
109
+ mask_image=mask_image,
110
+ ref_image=reference_image_to_encoder,
111
+ prompt=prompt,
112
+ scale=scale,
113
+ num_inference_steps=steps,
114
+ width=source_image.size[0],
115
+ height=source_image.size[1],
116
+ seed=seed + i,
117
+ )[0]
118
+ if use_context:
119
+ res_vis = res.crop((0, reference_context.size[1], res.width, res.height)) # remove context
120
+ mask_vis = mask_image.crop(
121
+ (0, reference_context.size[1], mask_image.width, mask_image.height)) # remove context mask
122
+ else:
123
+ res_vis = res
124
+ mask_vis = mask_image
125
+ all_generated_images.append((res_vis, f"Generating {i + 1} (Seed: {seed + i})"))
126
+
127
+ return mask_vis, reference_image_to_encoder, all_generated_images
128
+
129
+
130
+ # Main gradio codes.
131
+ with gr.Blocks(theme="default", css=".image-editor img {max-width: 70%; height: 70%;}") as demo:
132
+ gr.Markdown(
133
+ """
134
+ # 🖌️ Calligrapher: Freestyle Text Image Customization &emsp;&emsp; [[Code]](https://github.com/Calligrapher2025/Calligrapher) [[Project Page]](https://calligrapher2025.github.io/Calligrapher/)
135
+ ### Consider giving a star to the [project](https://github.com/Calligrapher2025/Calligrapher) if you find it useful!
136
+ """
137
+ )
138
+
139
+ with gr.Row():
140
+ with gr.Column(scale=3):
141
+ gr.Markdown("### 🎨 Image Editing Panel")
142
+ editor_component = gr.ImageEditor(
143
+ label="Upload or Draw",
144
+ type="pil",
145
+ brush=gr.Brush(colors=["#FFFFFF"], default_size=30, color_mode="fixed"),
146
+ layers=True,
147
+ interactive=True,
148
+ )
149
+
150
+ gr.Markdown("### 📤 Output Result")
151
+ gallery = gr.Gallery(label="🖼️ Result Gallery")
152
+ gr.Markdown(
153
+ """<br>
154
+
155
+ ### ✨User Tips:
156
+
157
+ 1. **Speed vs Quality Trade-off.** Use fewer steps (e.g., 10-step which takes ~4s/image on a single A6000 GPU) for faster generation, but quality may be lower.
158
+
159
+ 2. **Inpaint Position Freedom.** Inpainting positions are flexible - they don't necessarily need to match the original text locations in the input image.
160
+
161
+ 3. **Iterative Editing.** Drag outputs from the gallery to the Image Editing Panel (clean the Editing Panel first) for quick refinements.
162
+
163
+ 4. **Mask Optimization.** Adjust mask size/aspect ratio to match your desired content. The model tends to fill the masks, and harmonizes the generation with background in terms of color and lighting.
164
+
165
+ 5. **Reference Image Tip.** White-background references improve style consistency - the encoder also considers background context of the given reference image.
166
+
167
+ 6. **Resolution Balance.** Very high-resolution generation sometimes triggers spelling errors. 512/768px are recommended considering the model is trained under the resolution of 512.
168
+ """
169
+ )
170
+ with gr.Column(scale=1):
171
+ gr.Markdown("### ⚙️Settings")
172
+ reference_image = gr.Image(
173
+ label="🧩 Reference Image (skip this if self-reference)",
174
+ sources=["upload"],
175
+ type="pil",
176
+ )
177
+ prompt = gr.Textbox(
178
+ label="📝 Prompt",
179
+ placeholder="The text is 'Image'...",
180
+ value="The text is 'Image'."
181
+ )
182
+
183
+ with gr.Accordion("🔧 Additional Settings", open=True):
184
+ with gr.Row():
185
+ height = gr.Number(label="Height", value=512, precision=0)
186
+ width = gr.Number(label="Width", value=512, precision=0)
187
+ scale = gr.Slider(0.0, 2.0, 1.0, step=0.1, value=1.0, label="🎚️ Strength")
188
+ steps = gr.Slider(1, 100, 50, step=1, label="🔁 Steps")
189
+ with gr.Row():
190
+ seed = gr.Number(label="🎲 Seed", value=56, precision=0)
191
+ use_context = gr.Checkbox(value=True, label="🔍 Use Context", interactive=True)
192
+ num_images = gr.Slider(1, 16, 2, step=1, label="🖼️ Sample Amount")
193
+
194
+ run_btn = gr.Button("🚀 Run", variant="primary")
195
+
196
+ mask_output = gr.Image(label="🟩 Mask Demo")
197
+ reference_demo = gr.Image(label="🧩 Reference Demo")
198
+
199
+ # Run button event.
200
+ run_btn.click(
201
+ fn=process_and_generate,
202
+ inputs=[
203
+ editor_component,
204
+ reference_image,
205
+ prompt,
206
+ height,
207
+ width,
208
+ scale,
209
+ steps,
210
+ seed,
211
+ use_context,
212
+ num_images
213
+ ],
214
+ outputs=[
215
+ mask_output,
216
+ reference_demo,
217
+ gallery
218
+ ]
219
+ )
220
+
221
+ if __name__ == "__main__":
222
+ demo.launch(mcp_server=True)