Spaces:

tobiaspires
/

ad-image-generation

Runtime error

App Files Files Community

tobiaspires commited on Mar 22, 2023

Commit

50a83a4

1 Parent(s): ca1f90f

Atualização

Browse files

Files changed (4) hide show

app.py +110 -33
fonts/arial.ttf +0 -0
templates/template1.png +0 -0
templates/template2.png +0 -0

app.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import io
 from io import BytesIO
 import gradio as gr
 import requests
 import torch
 from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
-from diffusers import StableDiffusionInpaintPipeline
-from PIL import Image, ImageOps
 import PIL
 import replicate
 import os
@@ -18,11 +17,18 @@ device = torch.device(device_name)
 processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
 model_clip = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined").to(device)
-os.environ['REPLICATE_API_TOKEN'] = '16ea7157b65a155892e29298b6ddac479a12e819'
-model_name = 'cjwbw/stable-diffusion-v2-inpainting'
-model = replicate.models.get(model_name)
-version = model.versions.get("f9bb0632bfdceb83196e85521b9b55895f8ff3d1d3b487fd1973210c0eb30bec")
 def numpy_to_pil(images):
@@ -31,7 +37,6 @@ def numpy_to_pil(images):
     images = (images * 255).round().astype("uint8")
     if images.shape[-1] == 1:
-        # special case for grayscale (single channel) images
         pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images]
     else:
         pil_images = [Image.fromarray(image) for image in images]
@@ -48,50 +53,48 @@ def get_mask(text, image):
     mask = torch.sigmoid(outputs.logits).cpu().detach().unsqueeze(-1).numpy()
     mask_pil = numpy_to_pil(mask)[0].resize(image.size)
-    #mask_pil.show()
     return mask_pil
 def image_to_byte_array(image: Image) -> bytes:
-  # BytesIO is a file-like buffer stored in memory
   imgByteArr = io.BytesIO()
-  # image.save expects a file-like as a argument
   image.save(imgByteArr, format='PNG')
-  # Turn the BytesIO object back into a bytes object
   #imgByteArr = imgByteArr.getvalue()
   return imgByteArr
-def predict(prompt, negative_prompt, image, obj2mask):
-    mask = get_mask(obj2mask, image)
     image = image.convert("RGB").resize((512, 512))
-    mask_image = mask.convert("RGB").resize((512, 512))
     mask_image = ImageOps.invert(mask_image)
-    #  open("/home/tobias/WorkspageBE/replicate/tenis.png", "rb")
-    # io.BufferedReader(image_to_byte_array(image))
     inputs = {
         # Input prompt
         'prompt': prompt,
         # Inital image to generate variations of. Supproting images size with
         # 512x512
         'image': image_to_byte_array(image),
         # Black and white image to use as mask for inpainting over the image
-        # provided. Black pixels are inpainted and white pixels are preserved
         'mask': image_to_byte_array(mask_image),
-        # Prompt strength when using init image. 1.0 corresponds to full
-        # destruction of information in init image
-        'prompt_strength': 0.8,
         # Number of images to output. Higher number of outputs may OOM.
         # Range: 1 to 8
         'num_outputs': 1,
         # Number of denoising steps
         # Range: 1 to 500
-        'num_inference_steps': 50,
         # Scale for classifier-free guidance
         # Range: 1 to 20
@@ -112,31 +115,105 @@ def predict(prompt, negative_prompt, image, obj2mask):
     return (img_final)
-def inference(prompt, negative_prompt, obj2mask, image_numpy):
     generator = torch.Generator()
     generator.manual_seed(int(52362))
     image = numpy_to_pil(image_numpy)[0].convert("RGB").resize((512, 512))
-    img = predict(prompt, negative_prompt, image, obj2mask)
-    return img
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            prompt = gr.Textbox(label="Prompt", value="cinematic, advertisement, sharpe focus, ad, ads")
-            negative_prompt = gr.Textbox(label="Negative Prompt", value="text, written")
-            mask = gr.Textbox(label="Mask", value="shoe")
             intput_img = gr.Image()
             run = gr.Button(value="Generate")
         with gr.Column():
-            output_img = gr.Image()
     run.click(
         inference,
-        inputs=[prompt, negative_prompt, mask, intput_img
-        ],
-        outputs=output_img,
     )
 demo.queue(concurrency_count=1)

+import asyncio
 import io
 from io import BytesIO
 import gradio as gr
 import requests
 import torch
 from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
+from PIL import Image, ImageOps, ImageDraw, ImageFont
 import PIL
 import replicate
 import os
 processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
 model_clip = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined").to(device)
+os.environ['REPLICATE_API_TOKEN'] = 'cbd8d6421f3037d482bd7d6ec8e7368350e3aaab'
+model = replicate.models.get("stability-ai/stable-diffusion-inpainting")
+version = model.versions.get("c28b92a7ecd66eee4aefcd8a94eb9e7f6c3805d5f06038165407fb5cb355ba67")
+sf_prompt_1 = "old bridge, mountain, grass"
+sf_neg_prompt_1 = ""
+sf_prompt_2 = "sunflowers, old bridge, mountain, grass"
+sf_neg_prompt_2 = "animal"
+template1 = Image.open("templates/template1.png").resize((512, 512))
+template2 = Image.open("templates/template2.png").resize((512, 512))
 def numpy_to_pil(images):
     images = (images * 255).round().astype("uint8")
     if images.shape[-1] == 1:
         pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images]
     else:
         pil_images = [Image.fromarray(image) for image in images]
     mask = torch.sigmoid(outputs.logits).cpu().detach().unsqueeze(-1).numpy()
     mask_pil = numpy_to_pil(mask)[0].resize(image.size)
     return mask_pil
 def image_to_byte_array(image: Image) -> bytes:
   imgByteArr = io.BytesIO()
   image.save(imgByteArr, format='PNG')
   #imgByteArr = imgByteArr.getvalue()
   return imgByteArr
+def add_template(image, template):
+    image.paste(template, (0, 0), mask=template)
+    return image
+async def predict(prompt, negative_prompt, image, mask_img):
     image = image.convert("RGB").resize((512, 512))
+    mask_image = mask_img.convert("RGB").resize((512, 512))
     mask_image = ImageOps.invert(mask_image)
     inputs = {
         # Input prompt
         'prompt': prompt,
+        # Specify things to not see in the output
+         'negative_prompt': negative_prompt,
         # Inital image to generate variations of. Supproting images size with
         # 512x512
         'image': image_to_byte_array(image),
         # Black and white image to use as mask for inpainting over the image
+        # provided. White pixels are inpainted and black pixels are preserved
         'mask': image_to_byte_array(mask_image),
         # Number of images to output. Higher number of outputs may OOM.
         # Range: 1 to 8
         'num_outputs': 1,
         # Number of denoising steps
         # Range: 1 to 500
+        'num_inference_steps': 25,
         # Scale for classifier-free guidance
         # Range: 1 to 20
     return (img_final)
+async def predicts(sf_prompt_1, sf_neg_prompt_1, sf_prompt_2, sf_neg_prompt_2, image, image_numpy, mask_img):
+    testing_local = True
+    if testing_local:
+        img1 = Image.fromarray(image_numpy).convert("RGB").resize((512, 512))
+        img2 = Image.fromarray(image_numpy).convert("RGB").resize((512, 512))
+        return img1, img2
+    task1 = asyncio.create_task(predict(sf_prompt_1, sf_neg_prompt_1, image, mask_img))
+    await asyncio.sleep(5)
+    task2 = asyncio.create_task(predict(sf_prompt_2, sf_neg_prompt_2, image, mask_img))
+    await task1
+    await task2
+    img1 = task1.result()
+    img2 = task2.result()
+    return img1, img2
+def draw_text(img, template_coords, text1, text2):
+    font1 = ImageFont.truetype(font="fonts/arial.ttf", size=18)
+    font2 = ImageFont.truetype(font="fonts/arial.ttf", size=14)
+    x1 = template_coords['x1']
+    y1 = template_coords['y1']
+    x2 = template_coords['x2']
+    y2 = template_coords['y2']
+    draw = ImageDraw.Draw(img)
+    draw.text((x1, y1), text1, fill=(255, 0, 0), font=font1)
+    draw.text((x2, y2), text2, fill=(255, 0, 0), font=font2)
+def inference(obj2mask, image_numpy, txt_1, txt_2):
     generator = torch.Generator()
     generator.manual_seed(int(52362))
     image = numpy_to_pil(image_numpy)[0].convert("RGB").resize((512, 512))
+    mask_img = get_mask(obj2mask, image)
+    img1, img2 = asyncio.run(predicts(sf_prompt_1, sf_neg_prompt_1, sf_prompt_2, sf_neg_prompt_2, image, image_numpy, mask_img))
+    img1_1 = add_template(img1.copy(), template1.copy())
+    img1_2 = add_template(img1.copy(), template2.copy())
+    img2_1 = add_template(img2.copy(), template1.copy())
+    img2_2 = add_template(img2.copy(), template2.copy())
+    template1_coords = {
+        'x1': 5,
+        'y1': 300,
+        'x2': 15,
+        'y2': 400
+    }
+    template2_coords = {
+        'x1': 5,
+        'y1': 300,
+        'x2': 15,
+        'y2': 400
+    }
+    draw_text(img1_1, template1_coords, txt_1, txt_2)
+    draw_text(img1_2, template2_coords, txt_1, txt_2)
+    draw_text(img2_1, template1_coords, txt_1, txt_2)
+    draw_text(img2_2, template2_coords, txt_1, txt_2)
+    return [img1_1, img1_2, img2_1, img2_2]
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
+            txt_1 = gr.Textbox(label="Texto principal da propaganda", value="Promoção Imperdível")
+            txt_2 = gr.Textbox(label="Texto secundário da propaganda", value="Até 50% para alguns produtos")
+            mask = gr.Textbox(label="Descrição da imagem", value="shoe")
             intput_img = gr.Image()
             run = gr.Button(value="Generate")
+    with gr.Row():
+        with gr.Column():
+            output_img1_1 = gr.Image()
+        with gr.Column():
+            output_img1_2 = gr.Image()
+    with gr.Row():
+        with gr.Column():
+            output_img2_1 = gr.Image()
         with gr.Column():
+            output_img2_2 = gr.Image()
     run.click(
         inference,
+        inputs=[mask, intput_img, txt_1, txt_2],
+        outputs=[output_img1_1, output_img1_2, output_img2_1, output_img2_2],
     )
 demo.queue(concurrency_count=1)

fonts/arial.ttf ADDED Viewed

Binary file (917 kB). View file

templates/template1.png ADDED Viewed

templates/template2.png ADDED Viewed