Spaces:

Calligrapher2025
/

Calligrapher

Running on Zero

App Files Files Community

multimodalart HF Staff commited on Jul 21

Commit

7a07741

verified ·

1 Parent(s): 4583098

feat: Enable MCP

Browse files

Hello! This is an automated PR adding MCP compatibility to your AI App 🤖.

![image.png](https://cdn-uploads.huggingface.co/production/uploads/624bebf604abc7ebb01789af/HQQK38I_MDXLDMYDYBq8H.png)This PR introduces two improvements:
1. Adds docstrings to the functions in the app file that are directly connected to the Gradio UI, for the downstream LLM to use.
2. Enables the Model-Compute-Platform by adding `mcp_server=True` to the `.launch()` call.

No other logic has been changed. Please review and merge if it looks good!Learn more about MCP compatibility in Spaces here: https://huggingface.co/changelog/add-compatible-spaces-to-your-mcp-tools

Files changed (1) hide show

app.py +222 -202

app.py CHANGED Viewed

@@ -1,202 +1,222 @@
-"""
-    Gradio demo for text customization with Calligrapher (the reference is uploaded by the user).
-"""
-import gradio as gr
-import numpy as np
-from datetime import datetime
-import torch
-from PIL import Image
-import spaces
-from huggingface_hub import snapshot_download
-from pipeline_calligrapher import CalligrapherPipeline
-from models.calligrapher import Calligrapher
-from models.transformer_flux_inpainting import FluxTransformer2DModel
-from utils import process_gradio_source, get_bbox_from_mask, crop_image_from_bb, \
-                  resize_img_and_pad, generate_context_reference_image
-# Function of loading pre-trained models.
-def load_models():
-    snapshot_download(
-                repo_id="Calligrapher2025/Calligrapher",
-                allow_patterns="calligrapher.bin",
-                local_dir="./",
-                )
-    print("calligrapher.bin successfully downloaded!")
-    transformer = FluxTransformer2DModel.from_pretrained("black-forest-labs/FLUX.1-Fill-dev",
-                                                         subfolder="transformer",
-                                                         torch_dtype=torch.bfloat16)
-    pipe = CalligrapherPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev",
-                                                transformer=transformer,
-                                                torch_dtype=torch.bfloat16).to("cuda")
-    model = Calligrapher(pipe,
-                         image_encoder_path="google/siglip-so400m-patch14-384",
-                         calligrapher_path="calligrapher.bin",
-                         device="cuda", num_tokens=128)
-    return model
-# Init models.
-model = load_models()
-print('Model loaded!')
-@spaces.GPU()
-def process_and_generate(editor_component, reference_image, prompt, height, width,
-                         scale, steps=50, seed=42, use_context=True, num_images=1):
-    print('Begin processing!')
-    # Get source, mask, and cropped images from gr.ImageEditor.
-    source_image, mask_image, cropped_image = process_gradio_source(editor_component)
-    # Resize source and mask.
-    source_image = source_image.resize((width, height))
-    mask_image = mask_image.resize((width, height), Image.NEAREST)
-    mask_np = np.array(mask_image)
-    mask_np[mask_np > 0] = 255
-    mask_image = Image.fromarray(mask_np.astype(np.uint8))
-    if reference_image is None:
-        # If self-inpaint (no input ref): (1) get bounding box from the mask and (2) perform cropping to get the ref image.
-        tl, br = get_bbox_from_mask(mask_image)
-        # Convert irregularly shaped masks into rectangles.
-        reference_image = crop_image_from_bb(source_image, tl, br)
-    # Raw reference image before resizing.
-    reference_image_to_encoder = resize_img_and_pad(reference_image, target_size=(512, 512))
-    if use_context:
-        reference_context = generate_context_reference_image(reference_image, width)
-        # Concat the context on the top of the input masked image in the pixel space.
-        source_with_context = Image.new(source_image.mode, (width, reference_context.size[1] + height))
-        source_with_context.paste(reference_context, (0, 0))
-        source_with_context.paste(source_image, (0, reference_context.size[1]))
-        # Concat the zero mask on the top of the mask image.
-        mask_with_context = Image.new(mask_image.mode,
-                                      (mask_image.size[0],
-                                       reference_context.size[1] + mask_image.size[0]),
-                                       color=0)
-        mask_with_context.paste(mask_image, (0, reference_context.size[1]))
-        source_image = source_with_context
-        mask_image = mask_with_context
-    all_generated_images = []
-    for i in range(num_images):
-        res = model.generate(
-            image=source_image,
-            mask_image=mask_image,
-            ref_image=reference_image_to_encoder,
-            prompt=prompt,
-            scale=scale,
-            num_inference_steps=steps,
-            width=source_image.size[0],
-            height=source_image.size[1],
-            seed=seed + i,
-        )[0]
-        if use_context:
-            res_vis = res.crop((0, reference_context.size[1], res.width, res.height))  # remove context
-            mask_vis = mask_image.crop(
-                (0, reference_context.size[1], mask_image.width, mask_image.height))  # remove context mask
-        else:
-            res_vis = res
-            mask_vis = mask_image
-        all_generated_images.append((res_vis, f"Generating {i + 1} (Seed: {seed + i})"))
-    return mask_vis, reference_image_to_encoder, all_generated_images
-# Main gradio codes.
-with gr.Blocks(theme="default", css=".image-editor img {max-width: 70%; height: 70%;}") as demo:
-    gr.Markdown(
-        """
-        # 🖌️ Calligrapher: Freestyle Text Image Customization &emsp;&emsp; [[Code]](https://github.com/Calligrapher2025/Calligrapher) [[Project Page]](https://calligrapher2025.github.io/Calligrapher/)
-        ### Consider giving a star to the [project](https://github.com/Calligrapher2025/Calligrapher) if you find it useful!
-        """
-    )
-    with gr.Row():
-        with gr.Column(scale=3):
-            gr.Markdown("### 🎨 Image Editing Panel")
-            editor_component = gr.ImageEditor(
-                label="Upload or Draw",
-                type="pil",
-                brush=gr.Brush(colors=["#FFFFFF"], default_size=30, color_mode="fixed"),
-                layers=True,
-                interactive=True,
-            )
-            gr.Markdown("### 📤 Output Result")
-            gallery = gr.Gallery(label="🖼️ Result Gallery")
-            gr.Markdown(
-                """<br>
-                 ### ✨User Tips:
-                 1. **Speed vs Quality Trade-off.** Use fewer steps (e.g., 10-step which takes ~4s/image on a single A6000 GPU) for faster generation, but quality may be lower.
-                 2. **Inpaint Position Freedom.**  Inpainting positions are flexible - they don't necessarily need to match the original text locations in the input image.
-                 3. **Iterative Editing.** Drag outputs from the gallery to the Image Editing Panel (clean the Editing Panel first) for quick refinements.
-                 4. **Mask Optimization.** Adjust mask size/aspect ratio to match your desired content. The model tends to fill the masks, and harmonizes the generation with background in terms of color and lighting.
-                 5. **Reference Image Tip.**  White-background references improve style consistency - the encoder also considers background context of the given reference image.
-                 6. **Resolution Balance.** Very high-resolution generation sometimes triggers spelling errors. 512/768px are recommended considering the model is trained under the resolution of 512.
-                """
-            )
-        with gr.Column(scale=1):
-            gr.Markdown("### ⚙️Settings")
-            reference_image = gr.Image(
-                label="🧩 Reference Image  (skip this if self-reference)",
-                sources=["upload"],
-                type="pil",
-            )
-            prompt = gr.Textbox(
-                label="📝 Prompt",
-                placeholder="The text is 'Image'...",
-                value="The text is 'Image'."
-            )
-            with gr.Accordion("🔧 Additional Settings", open=True):
-                with gr.Row():
-                    height = gr.Number(label="Height", value=512, precision=0)
-                    width = gr.Number(label="Width", value=512, precision=0)
-                scale = gr.Slider(0.0, 2.0, 1.0, step=0.1, value=1.0, label="🎚️ Strength")
-                steps = gr.Slider(1, 100, 50, step=1, label="🔁 Steps")
-                with gr.Row():
-                    seed = gr.Number(label="🎲 Seed", value=56, precision=0)
-                    use_context = gr.Checkbox(value=True, label="🔍 Use Context", interactive=True)
-                num_images = gr.Slider(1, 16, 2, step=1, label="🖼️ Sample Amount")
-            run_btn = gr.Button("🚀 Run", variant="primary")
-            mask_output = gr.Image(label="🟩 Mask Demo")
-            reference_demo = gr.Image(label="🧩 Reference Demo")
-    # Run button event.
-    run_btn.click(
-        fn=process_and_generate,
-        inputs=[
-            editor_component,
-            reference_image,
-            prompt,
-            height,
-            width,
-            scale,
-            steps,
-            seed,
-            use_context,
-            num_images
-        ],
-        outputs=[
-            mask_output,
-            reference_demo,
-            gallery
-        ]
-    )
-if __name__ == "__main__":
-    demo.launch()

+import gradio as gr
+import numpy as np
+from datetime import datetime
+import torch
+from PIL import Image
+import spaces
+from huggingface_hub import snapshot_download
+from pipeline_calligrapher import CalligrapherPipeline
+from models.calligrapher import Calligrapher
+from models.transformer_flux_inpainting import FluxTransformer2DModel
+from utils import process_gradio_source, get_bbox_from_mask, crop_image_from_bb, \
+                  resize_img_and_pad, generate_context_reference_image
+# Function of loading pre-trained models.
+def load_models():
+    snapshot_download(
+                repo_id="Calligrapher2025/Calligrapher",
+                allow_patterns="calligrapher.bin",
+                local_dir="./",
+                )
+    print("calligrapher.bin successfully downloaded!")
+    transformer = FluxTransformer2DModel.from_pretrained("black-forest-labs/FLUX.1-Fill-dev",
+                                                         subfolder="transformer",
+                                                         torch_dtype=torch.bfloat16)
+    pipe = CalligrapherPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev",
+                                                transformer=transformer,
+                                                torch_dtype=torch.bfloat16).to("cuda")
+    model = Calligrapher(pipe,
+                         image_encoder_path="google/siglip-so400m-patch14-384",
+                         calligrapher_path="calligrapher.bin",
+                         device="cuda", num_tokens=128)
+    return model
+# Init models.
+model = load_models()
+print('Model loaded!')
+@spaces.GPU()
+def process_and_generate(editor_component, reference_image, prompt, height, width,
+                         scale, steps=50, seed=42, use_context=True, num_images=1):
+    """
+    Process input images and generate customized text images using the Calligrapher model.
+    This function handles the complete pipeline from processing user inputs through the image editor,
+    preparing reference images, applying masks, and generating multiple customized text images
+    based on the provided parameters.
+    Args:
+        editor_component: Gradio ImageEditor component containing the source image and mask drawings.
+        reference_image: PIL Image object of the reference style image, or None for self-reference.
+        prompt: String describing the desired text content.
+        height: Integer height of the output image in pixels.
+        width: Integer width of the output image in pixels.
+        scale: Float value controlling the generation strength (0.0 to 2.0).
+        steps: Integer number of inference steps for the generation process (default: 50).
+        seed: Integer random seed for reproducible generation (default: 42).
+        use_context: Boolean flag to include context reference in generation (default: True).
+        num_images: Integer number of images to generate (default: 1).
+    Returns:
+        Tuple containing:
+            - mask_vis: PIL Image of the processed mask (with context removed if applicable).
+            - reference_image_to_encoder: PIL Image of the resized reference image used by the encoder.
+            - all_generated_images: List of tuples, each containing (generated_image, caption_string).
+    """
+    print('Begin processing!')
+    # Get source, mask, and cropped images from gr.ImageEditor.
+    source_image, mask_image, cropped_image = process_gradio_source(editor_component)
+    # Resize source and mask.
+    source_image = source_image.resize((width, height))
+    mask_image = mask_image.resize((width, height), Image.NEAREST)
+    mask_np = np.array(mask_image)
+    mask_np[mask_np > 0] = 255
+    mask_image = Image.fromarray(mask_np.astype(np.uint8))
+    if reference_image is None:
+        # If self-inpaint (no input ref): (1) get bounding box from the mask and (2) perform cropping to get the ref image.
+        tl, br = get_bbox_from_mask(mask_image)
+        # Convert irregularly shaped masks into rectangles.
+        reference_image = crop_image_from_bb(source_image, tl, br)
+    # Raw reference image before resizing.
+    reference_image_to_encoder = resize_img_and_pad(reference_image, target_size=(512, 512))
+    if use_context:
+        reference_context = generate_context_reference_image(reference_image, width)
+        # Concat the context on the top of the input masked image in the pixel space.
+        source_with_context = Image.new(source_image.mode, (width, reference_context.size[1] + height))
+        source_with_context.paste(reference_context, (0, 0))
+        source_with_context.paste(source_image, (0, reference_context.size[1]))
+        # Concat the zero mask on the top of the mask image.
+        mask_with_context = Image.new(mask_image.mode,
+                                      (mask_image.size[0],
+                                       reference_context.size[1] + mask_image.size[0]),
+                                       color=0)
+        mask_with_context.paste(mask_image, (0, reference_context.size[1]))
+        source_image = source_with_context
+        mask_image = mask_with_context
+    all_generated_images = []
+    for i in range(num_images):
+        res = model.generate(
+            image=source_image,
+            mask_image=mask_image,
+            ref_image=reference_image_to_encoder,
+            prompt=prompt,
+            scale=scale,
+            num_inference_steps=steps,
+            width=source_image.size[0],
+            height=source_image.size[1],
+            seed=seed + i,
+        )[0]
+        if use_context:
+            res_vis = res.crop((0, reference_context.size[1], res.width, res.height))  # remove context
+            mask_vis = mask_image.crop(
+                (0, reference_context.size[1], mask_image.width, mask_image.height))  # remove context mask
+        else:
+            res_vis = res
+            mask_vis = mask_image
+        all_generated_images.append((res_vis, f"Generating {i + 1} (Seed: {seed + i})"))
+    return mask_vis, reference_image_to_encoder, all_generated_images
+# Main gradio codes.
+with gr.Blocks(theme="default", css=".image-editor img {max-width: 70%; height: 70%;}") as demo:
+    gr.Markdown(
+        """
+        # 🖌️ Calligrapher: Freestyle Text Image Customization &emsp;&emsp; [[Code]](https://github.com/Calligrapher2025/Calligrapher) [[Project Page]](https://calligrapher2025.github.io/Calligrapher/)
+        ### Consider giving a star to the [project](https://github.com/Calligrapher2025/Calligrapher) if you find it useful!
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=3):
+            gr.Markdown("### 🎨 Image Editing Panel")
+            editor_component = gr.ImageEditor(
+                label="Upload or Draw",
+                type="pil",
+                brush=gr.Brush(colors=["#FFFFFF"], default_size=30, color_mode="fixed"),
+                layers=True,
+                interactive=True,
+            )
+            gr.Markdown("### 📤 Output Result")
+            gallery = gr.Gallery(label="🖼️ Result Gallery")
+            gr.Markdown(
+                """<br>
+                 ### ✨User Tips:
+                 1. **Speed vs Quality Trade-off.** Use fewer steps (e.g., 10-step which takes ~4s/image on a single A6000 GPU) for faster generation, but quality may be lower.
+                 2. **Inpaint Position Freedom.**  Inpainting positions are flexible - they don't necessarily need to match the original text locations in the input image.
+                 3. **Iterative Editing.** Drag outputs from the gallery to the Image Editing Panel (clean the Editing Panel first) for quick refinements.
+                 4. **Mask Optimization.** Adjust mask size/aspect ratio to match your desired content. The model tends to fill the masks, and harmonizes the generation with background in terms of color and lighting.
+                 5. **Reference Image Tip.**  White-background references improve style consistency - the encoder also considers background context of the given reference image.
+                 6. **Resolution Balance.** Very high-resolution generation sometimes triggers spelling errors. 512/768px are recommended considering the model is trained under the resolution of 512.
+                """
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️Settings")
+            reference_image = gr.Image(
+                label="🧩 Reference Image  (skip this if self-reference)",
+                sources=["upload"],
+                type="pil",
+            )
+            prompt = gr.Textbox(
+                label="📝 Prompt",
+                placeholder="The text is 'Image'...",
+                value="The text is 'Image'."
+            )
+            with gr.Accordion("🔧 Additional Settings", open=True):
+                with gr.Row():
+                    height = gr.Number(label="Height", value=512, precision=0)
+                    width = gr.Number(label="Width", value=512, precision=0)
+                scale = gr.Slider(0.0, 2.0, 1.0, step=0.1, value=1.0, label="🎚️ Strength")
+                steps = gr.Slider(1, 100, 50, step=1, label="🔁 Steps")
+                with gr.Row():
+                    seed = gr.Number(label="🎲 Seed", value=56, precision=0)
+                    use_context = gr.Checkbox(value=True, label="🔍 Use Context", interactive=True)
+                num_images = gr.Slider(1, 16, 2, step=1, label="🖼️ Sample Amount")
+            run_btn = gr.Button("🚀 Run", variant="primary")
+            mask_output = gr.Image(label="🟩 Mask Demo")
+            reference_demo = gr.Image(label="🧩 Reference Demo")
+    # Run button event.
+    run_btn.click(
+        fn=process_and_generate,
+        inputs=[
+            editor_component,
+            reference_image,
+            prompt,
+            height,
+            width,
+            scale,
+            steps,
+            seed,
+            use_context,
+            num_images
+        ],
+        outputs=[
+            mask_output,
+            reference_demo,
+            gallery
+        ]
+    )
+if __name__ == "__main__":
+    demo.launch(mcp_server=True)