sachin commited on
Commit
998f798
·
1 Parent(s): 950f17d

merge code

Browse files
Files changed (4) hide show
  1. Dockerfile +1 -1
  2. intruct.py +212 -30
  3. merged_code.py +461 -0
  4. requirements.txt +3 -1
Dockerfile CHANGED
@@ -35,4 +35,4 @@ USER appuser
35
  EXPOSE 7860
36
 
37
  # Run the server
38
- CMD ["python", "/app/intruct.py"]
 
35
  EXPOSE 7860
36
 
37
  # Run the server
38
+ CMD ["python", "/app/merged_code.py"]
intruct.py CHANGED
@@ -15,10 +15,32 @@ from io import BytesIO
15
  import os
16
  import base64
17
  from typing import List
 
 
 
 
 
 
 
 
 
 
18
 
19
  # Initialize FastAPI app
20
  app = FastAPI()
21
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Load the pre-trained InstructPix2Pix model for editing
23
  model_id = "timbrooks/instruct-pix2pix"
24
  pipe_edit = StableDiffusionInstructPix2PixPipeline.from_pretrained(
@@ -101,36 +123,6 @@ async def generate_image(prompt: str):
101
  except Exception as e:
102
  return {"error": str(e)}
103
 
104
- @app.get("/generate_multiple")
105
- async def generate_multiple_images(prompts: List[str]):
106
- try:
107
- # List to store base64-encoded images
108
- generated_images = []
109
-
110
- # Generate an image for each prompt
111
- for prompt in prompts:
112
- image = pipe_generate(
113
- prompt,
114
- num_inference_steps=4,
115
- guidance_scale=0
116
- ).images[0]
117
-
118
- # Save image to buffer
119
- buffer = BytesIO()
120
- image.save(buffer, format="PNG")
121
- buffer.seek(0)
122
-
123
- # Encode the image as base64
124
- image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
125
- generated_images.append({
126
- "prompt": prompt,
127
- "image_base64": image_base64
128
- })
129
-
130
- return {"images": generated_images}
131
-
132
- except Exception as e:
133
- return {"error": str(e)}
134
 
135
  @app.get("/health")
136
  async def health_check():
@@ -274,6 +266,196 @@ async def root():
274
  """
275
  return {"message": "InstructPix2Pix API is running. Use POST /edit-image/ or /inpaint/ to edit images."}
276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  if __name__ == "__main__":
278
  import uvicorn
279
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
15
  import os
16
  import base64
17
  from typing import List
18
+ from fastapi import FastAPI, File, UploadFile, HTTPException
19
+ from fastapi.responses import StreamingResponse
20
+ from PIL import Image, ImageDraw, ImageFilter
21
+ import io
22
+ import torch
23
+ import numpy as np
24
+ from diffusers import StableDiffusionInpaintPipeline
25
+ import cv2
26
+
27
+
28
 
29
  # Initialize FastAPI app
30
  app = FastAPI()
31
 
32
+
33
+ model_id_runway = "runwayml/stable-diffusion-inpainting"
34
+ device = "cuda" if torch.cuda.is_available() else "cpu"
35
+
36
+ try:
37
+ pipe_runway = StableDiffusionInpaintPipeline.from_pretrained(model_id_runway)
38
+ pipe_runway.to(device)
39
+ except Exception as e:
40
+ raise RuntimeError(f"Failed to load model: {e}")
41
+
42
+
43
+
44
  # Load the pre-trained InstructPix2Pix model for editing
45
  model_id = "timbrooks/instruct-pix2pix"
46
  pipe_edit = StableDiffusionInstructPix2PixPipeline.from_pretrained(
 
123
  except Exception as e:
124
  return {"error": str(e)}
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  @app.get("/health")
128
  async def health_check():
 
266
  """
267
  return {"message": "InstructPix2Pix API is running. Use POST /edit-image/ or /inpaint/ to edit images."}
268
 
269
+
270
+
271
+ # Helper functions
272
+ def prepare_guided_image(original_image: Image, reference_image: Image, mask_image: Image) -> Image:
273
+ original_array = np.array(original_image)
274
+ reference_array = np.array(reference_image)
275
+ mask_array = np.array(mask_image) / 255.0
276
+ mask_array = mask_array[:, :, np.newaxis]
277
+ blended_array = original_array * (1 - mask_array) + reference_array * mask_array
278
+ return Image.fromarray(blended_array.astype(np.uint8))
279
+
280
+ def soften_mask(mask_image: Image, softness: int = 5) -> Image:
281
+ from PIL import ImageFilter
282
+ return mask_image.filter(ImageFilter.GaussianBlur(radius=softness))
283
+
284
+ def generate_rectangular_mask(image_size: tuple, x1: int = 100, y1: int = 100, x2: int = 200, y2: int = 200) -> Image:
285
+ mask = Image.new("L", image_size, 0)
286
+ draw = ImageDraw.Draw(mask)
287
+ draw.rectangle([x1, y1, x2, y2], fill=255)
288
+ return mask
289
+
290
+ def segment_tank(tank_image: Image) -> tuple[Image, Image]:
291
+ tank_array = np.array(tank_image.convert("RGB"))
292
+ tank_array = cv2.cvtColor(tank_array, cv2.COLOR_RGB2BGR)
293
+ hsv = cv2.cvtColor(tank_array, cv2.COLOR_BGR2HSV)
294
+ lower_snow = np.array([0, 0, 180])
295
+ upper_snow = np.array([180, 50, 255])
296
+ snow_mask = cv2.inRange(hsv, lower_snow, upper_snow)
297
+ tank_mask = cv2.bitwise_not(snow_mask)
298
+ kernel = np.ones((5, 5), np.uint8)
299
+ tank_mask = cv2.erode(tank_mask, kernel, iterations=1)
300
+ tank_mask = cv2.dilate(tank_mask, kernel, iterations=1)
301
+ tank_mask_image = Image.fromarray(tank_mask, mode="L")
302
+ tank_array_rgb = np.array(tank_image.convert("RGB"))
303
+ mask_array = tank_mask / 255.0
304
+ mask_array = mask_array[:, :, np.newaxis]
305
+ segmented_tank = (tank_array_rgb * mask_array).astype(np.uint8)
306
+ alpha = tank_mask
307
+ segmented_tank_rgba = np.zeros((tank_image.height, tank_image.width, 4), dtype=np.uint8)
308
+ segmented_tank_rgba[:, :, :3] = segmented_tank
309
+ segmented_tank_rgba[:, :, 3] = alpha
310
+ segmented_tank_image = Image.fromarray(segmented_tank_rgba, mode="RGBA")
311
+ return segmented_tank_image, tank_mask_image
312
+
313
+ async def apply_camouflage_to_tank(tank_image: Image) -> Image:
314
+ segmented_tank, tank_mask = segment_tank(tank_image)
315
+ segmented_tank.save("segmented_tank.png")
316
+ tank_mask.save("tank_mask.png")
317
+ camouflaged_tank = pipe_runway(
318
+ prompt="Apply a grassy camouflage pattern with shades of green and brown to the tank, preserving its structure.",
319
+ image=segmented_tank.convert("RGB"),
320
+ mask_image=tank_mask,
321
+ strength=0.5,
322
+ guidance_scale=8.0,
323
+ num_inference_steps=50,
324
+ negative_prompt="snow, ice, rock, stone, boat, unrelated objects"
325
+ ).images[0]
326
+ camouflaged_tank_rgba = np.zeros((camouflaged_tank.height, camouflaged_tank.width, 4), dtype=np.uint8)
327
+ camouflaged_tank_rgba[:, :, :3] = np.array(camouflaged_tank)
328
+ camouflaged_tank_rgba[:, :, 3] = np.array(tank_mask)
329
+ camouflaged_tank_image = Image.fromarray(camouflaged_tank_rgba, mode="RGBA")
330
+ camouflaged_tank_image.save("camouflaged_tank.png")
331
+ return camouflaged_tank_image
332
+
333
+ def fit_image_to_mask(original_image: Image, reference_image: Image, mask_x1: int, mask_y1: int, mask_x2: int, mask_y2: int) -> tuple:
334
+ mask_width = mask_x2 - mask_x1
335
+ mask_height = mask_y2 - mask_y1
336
+ if mask_width <= 0 or mask_height <= 0:
337
+ raise ValueError("Mask dimensions must be positive")
338
+ ref_width, ref_height = reference_image.size
339
+ aspect_ratio = ref_width / ref_height
340
+ if mask_width / mask_height > aspect_ratio:
341
+ new_height = mask_height
342
+ new_width = int(new_height * aspect_ratio)
343
+ else:
344
+ new_width = mask_width
345
+ new_height = int(new_width / aspect_ratio)
346
+ reference_image_resized = reference_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
347
+ guided_image = original_image.copy().convert("RGB")
348
+ paste_x = mask_x1 + (mask_width - new_width) // 2
349
+ paste_y = mask_y1 + (mask_height - new_height) // 2
350
+ guided_image.paste(reference_image_resized, (paste_x, paste_y), reference_image_resized)
351
+ mask_image = generate_rectangular_mask(original_image.size, mask_x1, mask_y1, mask_x2, mask_y2)
352
+ return guided_image, mask_image
353
+
354
+ # Endpoints
355
+ @app.post("/inpaint/")
356
+ async def inpaint_image(
357
+ image: UploadFile = File(...),
358
+ mask: UploadFile = File(...),
359
+ prompt: str = "Fill the masked area with appropriate content."
360
+ ):
361
+ try:
362
+ image_bytes = await image.read()
363
+ mask_bytes = await mask.read()
364
+ original_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
365
+ mask_image = Image.open(io.BytesIO(mask_bytes)).convert("L")
366
+ if original_image.size != mask_image.size:
367
+ raise HTTPException(status_code=400, detail="Image and mask dimensions must match.")
368
+ result = pipe_runway(prompt=prompt, image=original_image, mask_image=mask_image).images[0]
369
+ result_bytes = io.BytesIO()
370
+ result.save(result_bytes, format="PNG")
371
+ result_bytes.seek(0)
372
+ return StreamingResponse(
373
+ result_bytes,
374
+ media_type="image/png",
375
+ headers={"Content-Disposition": "attachment; filename=inpainted_image.png"}
376
+ )
377
+ except Exception as e:
378
+ raise HTTPException(status_code=500, detail=f"Error during inpainting: {e}")
379
+
380
+ @app.post("/inpaint-with-reference/")
381
+ async def inpaint_with_reference(
382
+ image: UploadFile = File(...),
383
+ reference_image: UploadFile = File(...),
384
+ prompt: str = "Integrate the reference content naturally into the masked area, matching style and lighting.",
385
+ mask_x1: int = 100,
386
+ mask_y1: int = 100,
387
+ mask_x2: int = 200,
388
+ mask_y2: int = 200
389
+ ):
390
+ try:
391
+ image_bytes = await image.read()
392
+ reference_bytes = await reference_image.read()
393
+ original_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
394
+ reference_image = Image.open(io.BytesIO(reference_bytes)).convert("RGB")
395
+ if original_image.size != reference_image.size:
396
+ reference_image = reference_image.resize(original_image.size, Image.Resampling.LANCZOS)
397
+ mask_image = generate_rectangular_mask(original_image.size, mask_x1, mask_y1, mask_x2, mask_y2)
398
+ softened_mask = soften_mask(mask_image, softness=5)
399
+ guided_image = prepare_guided_image(original_image, reference_image, softened_mask)
400
+ result = pipe_runway(
401
+ prompt=prompt,
402
+ image=guided_image,
403
+ mask_image=softened_mask,
404
+ strength=0.75,
405
+ guidance_scale=7.5
406
+ ).images[0]
407
+ result_bytes = io.BytesIO()
408
+ result.save(result_bytes, format="PNG")
409
+ result_bytes.seek(0)
410
+ return StreamingResponse(
411
+ result_bytes,
412
+ media_type="image/png",
413
+ headers={"Content-Disposition": "attachment; filename=natural_inpaint_image.png"}
414
+ )
415
+ except Exception as e:
416
+ raise HTTPException(status_code=500, detail=f"Error during natural inpainting: {e}")
417
+
418
+ @app.post("/fit-image-to-mask/")
419
+ async def fit_image_to_mask_endpoint(
420
+ image: UploadFile = File(...),
421
+ reference_image: UploadFile = File(...),
422
+ mask_x1: int = 200,
423
+ mask_y1: int = 200,
424
+ mask_x2: int = 500,
425
+ mask_y2: int = 500
426
+ ):
427
+ try:
428
+ image_bytes = await image.read()
429
+ reference_bytes = await reference_image.read()
430
+ original_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
431
+ reference_image = Image.open(io.BytesIO(reference_bytes)).convert("RGB")
432
+ camouflaged_tank = await apply_camouflage_to_tank(reference_image)
433
+ guided_image, mask_image = fit_image_to_mask(original_image, camouflaged_tank, mask_x1, mask_y1, mask_x2, mask_y2)
434
+ guided_image.save("guided_image_before_blending.png")
435
+ softened_mask = soften_mask(mask_image, softness=2)
436
+ result = pipe_runway(
437
+ prompt="Blend the camouflaged tank into the grassy field with trees, ensuring a non-snowy environment, matching the style, lighting, and surroundings.",
438
+ image=guided_image,
439
+ mask_image=softened_mask,
440
+ strength=0.2,
441
+ guidance_scale=7.5,
442
+ num_inference_steps=50,
443
+ negative_prompt="snow, ice, rock, stone, boat, unrelated objects"
444
+ ).images[0]
445
+ result_bytes = io.BytesIO()
446
+ result.save(result_bytes, format="PNG")
447
+ result_bytes.seek(0)
448
+ return StreamingResponse(
449
+ result_bytes,
450
+ media_type="image/png",
451
+ headers={"Content-Disposition": "attachment; filename=fitted_image.png"}
452
+ )
453
+ except ValueError as ve:
454
+ raise HTTPException(status_code=400, detail=f"ValueError in processing: {str(ve)}")
455
+ except Exception as e:
456
+ raise HTTPException(status_code=500, detail=f"Error during fitting and inpainting: {str(e)}")
457
+
458
+
459
  if __name__ == "__main__":
460
  import uvicorn
461
  uvicorn.run(app, host="0.0.0.0", port=7860)
merged_code.py ADDED
@@ -0,0 +1,461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, Form
2
+ from fastapi.responses import StreamingResponse
3
+ import io
4
+ import math
5
+ from PIL import Image, ImageOps, ImageDraw
6
+ import torch
7
+ from diffusers import StableDiffusionInstructPix2PixPipeline, StableDiffusionInpaintPipeline
8
+ from fastapi import FastAPI, Response
9
+ from fastapi.responses import FileResponse
10
+ import torch
11
+ from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
12
+ from huggingface_hub import hf_hub_download, login
13
+ from safetensors.torch import load_file
14
+ from io import BytesIO
15
+ import os
16
+ import base64
17
+ from typing import List
18
+ from fastapi import FastAPI, File, UploadFile, HTTPException
19
+ from fastapi.responses import StreamingResponse
20
+ from PIL import Image, ImageDraw, ImageFilter
21
+ import io
22
+ import torch
23
+ import numpy as np
24
+ from diffusers import StableDiffusionInpaintPipeline
25
+ import cv2
26
+
27
+
28
+
29
+ # Initialize FastAPI app
30
+ app = FastAPI()
31
+
32
+
33
+ model_id_runway = "runwayml/stable-diffusion-inpainting"
34
+ device = "cuda" if torch.cuda.is_available() else "cpu"
35
+
36
+ try:
37
+ pipe_runway = StableDiffusionInpaintPipeline.from_pretrained(model_id_runway)
38
+ pipe_runway.to(device)
39
+ except Exception as e:
40
+ raise RuntimeError(f"Failed to load model: {e}")
41
+
42
+
43
+
44
+ # Load the pre-trained InstructPix2Pix model for editing
45
+ model_id = "timbrooks/instruct-pix2pix"
46
+ pipe_edit = StableDiffusionInstructPix2PixPipeline.from_pretrained(
47
+ model_id, torch_dtype=torch.float16, safety_checker=None
48
+ ).to("cuda")
49
+
50
+ # Load the pre-trained Inpainting model
51
+ inpaint_model_id = "stabilityai/stable-diffusion-2-inpainting"
52
+ pipe_inpaint = StableDiffusionInpaintPipeline.from_pretrained(
53
+ inpaint_model_id, torch_dtype=torch.float16, safety_checker=None
54
+ ).to("cuda")
55
+
56
+ # Default configuration values
57
+ DEFAULT_STEPS = 50
58
+ DEFAULT_TEXT_CFG = 7.5
59
+ DEFAULT_IMAGE_CFG = 1.5
60
+ DEFAULT_SEED = 1371
61
+
62
+ HF_TOKEN = os.getenv("HF_TOKEN")
63
+
64
+ def load_model():
65
+ try:
66
+ # Login to Hugging Face if token is provided
67
+ if HF_TOKEN:
68
+ login(token=HF_TOKEN)
69
+
70
+ base = "stabilityai/stable-diffusion-xl-base-1.0"
71
+ repo = "ByteDance/SDXL-Lightning"
72
+ ckpt = "sdxl_lightning_4step_unet.safetensors"
73
+
74
+ # Load model with explicit error handling
75
+ unet = UNet2DConditionModel.from_config(
76
+ base,
77
+ subfolder="unet"
78
+ ).to("cuda", torch.float16)
79
+
80
+ unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device="cuda"))
81
+ pipe = StableDiffusionXLPipeline.from_pretrained(
82
+ base,
83
+ unet=unet,
84
+ torch_dtype=torch.float16,
85
+ variant="fp16"
86
+ ).to("cuda")
87
+
88
+ # Configure scheduler
89
+ pipe.scheduler = EulerDiscreteScheduler.from_config(
90
+ pipe.scheduler.config,
91
+ timestep_spacing="trailing"
92
+ )
93
+
94
+ return pipe
95
+
96
+ except Exception as e:
97
+ raise Exception(f"Failed to load model: {str(e)}")
98
+
99
+ # Load model at startup with error handling
100
+ try:
101
+ pipe_generate = load_model()
102
+ except Exception as e:
103
+ print(f"Model initialization failed: {str(e)}")
104
+ raise
105
+
106
+ @app.get("/generate")
107
+ async def generate_image(prompt: str):
108
+ try:
109
+ # Generate image
110
+ image = pipe_generate(
111
+ prompt,
112
+ num_inference_steps=4,
113
+ guidance_scale=0
114
+ ).images[0]
115
+
116
+ # Save image to buffer
117
+ buffer = BytesIO()
118
+ image.save(buffer, format="PNG")
119
+ buffer.seek(0)
120
+
121
+ return Response(content=buffer.getvalue(), media_type="image/png")
122
+
123
+ except Exception as e:
124
+ return {"error": str(e)}
125
+
126
+
127
+ @app.get("/health")
128
+ async def health_check():
129
+ return {"status": "healthy"}
130
+
131
+ def process_image(input_image: Image.Image, instruction: str, steps: int, text_cfg_scale: float, image_cfg_scale: float, seed: int):
132
+ """
133
+ Process the input image with the given instruction using InstructPix2Pix.
134
+ """
135
+ # Resize image to fit model requirements
136
+ width, height = input_image.size
137
+ factor = 512 / max(width, height)
138
+ factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height)
139
+ width = int((width * factor) // 64) * 64
140
+ height = int((height * factor) // 64) * 64
141
+ input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS)
142
+
143
+ if not instruction:
144
+ return input_image
145
+
146
+ # Set the random seed for reproducibility
147
+ generator = torch.manual_seed(seed)
148
+
149
+ # Generate the edited image
150
+ edited_image = pipe_edit(
151
+ instruction,
152
+ image=input_image,
153
+ guidance_scale=text_cfg_scale,
154
+ image_guidance_scale=image_cfg_scale,
155
+ num_inference_steps=steps,
156
+ generator=generator,
157
+ ).images[0]
158
+
159
+ return edited_image
160
+
161
+ @app.post("/edit-image/")
162
+ async def edit_image(
163
+ file: UploadFile = File(...),
164
+ instruction: str = Form(...),
165
+ steps: int = Form(default=DEFAULT_STEPS),
166
+ text_cfg_scale: float = Form(default=DEFAULT_TEXT_CFG),
167
+ image_cfg_scale: float = Form(default=DEFAULT_IMAGE_CFG),
168
+ seed: int = Form(default=DEFAULT_SEED)
169
+ ):
170
+ """
171
+ Endpoint to edit an image based on a text instruction.
172
+ """
173
+ # Read and convert the uploaded image
174
+ image_data = await file.read()
175
+ input_image = Image.open(io.BytesIO(image_data)).convert("RGB")
176
+
177
+ # Process the image
178
+ edited_image = process_image(input_image, instruction, steps, text_cfg_scale, image_cfg_scale, seed)
179
+
180
+ # Convert the edited image to bytes
181
+ img_byte_arr = io.BytesIO()
182
+ edited_image.save(img_byte_arr, format="PNG")
183
+ img_byte_arr.seek(0)
184
+
185
+ # Return the image as a streaming response
186
+ return StreamingResponse(img_byte_arr, media_type="image/png")
187
+
188
+ # New endpoint for inpainting
189
+ @app.post("/inpaint/")
190
+ async def inpaint_image(
191
+ file: UploadFile = File(...),
192
+ prompt: str = Form(...),
193
+ mask_coordinates: str = Form(...), # Format: "x1,y1,x2,y2" (top-left and bottom-right of the rectangle to inpaint)
194
+ steps: int = Form(default=DEFAULT_STEPS),
195
+ guidance_scale: float = Form(default=7.5),
196
+ seed: int = Form(default=DEFAULT_SEED)
197
+ ):
198
+ """
199
+ Endpoint to perform inpainting on an image.
200
+ - file: The input image to inpaint.
201
+ - prompt: The text prompt describing what to generate in the inpainted area.
202
+ - mask_coordinates: Coordinates of the rectangular area to inpaint (format: "x1,y1,x2,y2").
203
+ - steps: Number of inference steps.
204
+ - guidance_scale: Guidance scale for the inpainting process.
205
+ - seed: Random seed for reproducibility.
206
+ """
207
+ try:
208
+ # Read and convert the uploaded image
209
+ image_data = await file.read()
210
+ input_image = Image.open(io.BytesIO(image_data)).convert("RGB")
211
+
212
+ # Resize image to fit model requirements (must be divisible by 8 for inpainting)
213
+ width, height = input_image.size
214
+ factor = 512 / max(width, height)
215
+ factor = math.ceil(min(width, height) * factor / 8) * 8 / min(width, height)
216
+ width = int((width * factor) // 8) * 8
217
+ height = int((height * factor) // 8) * 8
218
+ input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS)
219
+
220
+ # Create a mask for inpainting
221
+ mask = Image.new("L", (width, height), 0) # Black image (0 = no inpainting)
222
+ draw = ImageDraw.Draw(mask)
223
+
224
+ # Parse the mask coordinates
225
+ try:
226
+ x1, y1, x2, y2 = map(int, mask_coordinates.split(","))
227
+ # Adjust coordinates based on resized image
228
+ x1 = int(x1 * factor)
229
+ y1 = int(y1 * factor)
230
+ x2 = int(x2 * factor)
231
+ y2 = int(y2 * factor)
232
+ except ValueError:
233
+ return {"error": "Invalid mask coordinates format. Use 'x1,y1,x2,y2'."}
234
+
235
+ # Draw a white rectangle on the mask (255 = area to inpaint)
236
+ draw.rectangle([x1, y1, x2, y2], fill=255)
237
+
238
+ # Set the random seed for reproducibility
239
+ generator = torch.manual_seed(seed)
240
+
241
+ # Perform inpainting
242
+ inpainted_image = pipe_inpaint(
243
+ prompt=prompt,
244
+ image=input_image,
245
+ mask_image=mask,
246
+ num_inference_steps=steps,
247
+ guidance_scale=guidance_scale,
248
+ generator=generator,
249
+ ).images[0]
250
+
251
+ # Convert the inpainted image to bytes
252
+ img_byte_arr = io.BytesIO()
253
+ inpainted_image.save(img_byte_arr, format="PNG")
254
+ img_byte_arr.seek(0)
255
+
256
+ # Return the image as a streaming response
257
+ return StreamingResponse(img_byte_arr, media_type="image/png")
258
+
259
+ except Exception as e:
260
+ return {"error": str(e)}
261
+
262
+ @app.get("/")
263
+ async def root():
264
+ """
265
+ Root endpoint for basic health check.
266
+ """
267
+ return {"message": "InstructPix2Pix API is running. Use POST /edit-image/ or /inpaint/ to edit images."}
268
+
269
+
270
+
271
+ # Helper functions
272
+ def prepare_guided_image(original_image: Image, reference_image: Image, mask_image: Image) -> Image:
273
+ original_array = np.array(original_image)
274
+ reference_array = np.array(reference_image)
275
+ mask_array = np.array(mask_image) / 255.0
276
+ mask_array = mask_array[:, :, np.newaxis]
277
+ blended_array = original_array * (1 - mask_array) + reference_array * mask_array
278
+ return Image.fromarray(blended_array.astype(np.uint8))
279
+
280
+ def soften_mask(mask_image: Image, softness: int = 5) -> Image:
281
+ from PIL import ImageFilter
282
+ return mask_image.filter(ImageFilter.GaussianBlur(radius=softness))
283
+
284
+ def generate_rectangular_mask(image_size: tuple, x1: int = 100, y1: int = 100, x2: int = 200, y2: int = 200) -> Image:
285
+ mask = Image.new("L", image_size, 0)
286
+ draw = ImageDraw.Draw(mask)
287
+ draw.rectangle([x1, y1, x2, y2], fill=255)
288
+ return mask
289
+
290
+ def segment_tank(tank_image: Image) -> tuple[Image, Image]:
291
+ tank_array = np.array(tank_image.convert("RGB"))
292
+ tank_array = cv2.cvtColor(tank_array, cv2.COLOR_RGB2BGR)
293
+ hsv = cv2.cvtColor(tank_array, cv2.COLOR_BGR2HSV)
294
+ lower_snow = np.array([0, 0, 180])
295
+ upper_snow = np.array([180, 50, 255])
296
+ snow_mask = cv2.inRange(hsv, lower_snow, upper_snow)
297
+ tank_mask = cv2.bitwise_not(snow_mask)
298
+ kernel = np.ones((5, 5), np.uint8)
299
+ tank_mask = cv2.erode(tank_mask, kernel, iterations=1)
300
+ tank_mask = cv2.dilate(tank_mask, kernel, iterations=1)
301
+ tank_mask_image = Image.fromarray(tank_mask, mode="L")
302
+ tank_array_rgb = np.array(tank_image.convert("RGB"))
303
+ mask_array = tank_mask / 255.0
304
+ mask_array = mask_array[:, :, np.newaxis]
305
+ segmented_tank = (tank_array_rgb * mask_array).astype(np.uint8)
306
+ alpha = tank_mask
307
+ segmented_tank_rgba = np.zeros((tank_image.height, tank_image.width, 4), dtype=np.uint8)
308
+ segmented_tank_rgba[:, :, :3] = segmented_tank
309
+ segmented_tank_rgba[:, :, 3] = alpha
310
+ segmented_tank_image = Image.fromarray(segmented_tank_rgba, mode="RGBA")
311
+ return segmented_tank_image, tank_mask_image
312
+
313
+ async def apply_camouflage_to_tank(tank_image: Image) -> Image:
314
+ segmented_tank, tank_mask = segment_tank(tank_image)
315
+ segmented_tank.save("segmented_tank.png")
316
+ tank_mask.save("tank_mask.png")
317
+ camouflaged_tank = pipe_runway(
318
+ prompt="Apply a grassy camouflage pattern with shades of green and brown to the tank, preserving its structure.",
319
+ image=segmented_tank.convert("RGB"),
320
+ mask_image=tank_mask,
321
+ strength=0.5,
322
+ guidance_scale=8.0,
323
+ num_inference_steps=50,
324
+ negative_prompt="snow, ice, rock, stone, boat, unrelated objects"
325
+ ).images[0]
326
+ camouflaged_tank_rgba = np.zeros((camouflaged_tank.height, camouflaged_tank.width, 4), dtype=np.uint8)
327
+ camouflaged_tank_rgba[:, :, :3] = np.array(camouflaged_tank)
328
+ camouflaged_tank_rgba[:, :, 3] = np.array(tank_mask)
329
+ camouflaged_tank_image = Image.fromarray(camouflaged_tank_rgba, mode="RGBA")
330
+ camouflaged_tank_image.save("camouflaged_tank.png")
331
+ return camouflaged_tank_image
332
+
333
+ def fit_image_to_mask(original_image: Image, reference_image: Image, mask_x1: int, mask_y1: int, mask_x2: int, mask_y2: int) -> tuple:
334
+ mask_width = mask_x2 - mask_x1
335
+ mask_height = mask_y2 - mask_y1
336
+ if mask_width <= 0 or mask_height <= 0:
337
+ raise ValueError("Mask dimensions must be positive")
338
+ ref_width, ref_height = reference_image.size
339
+ aspect_ratio = ref_width / ref_height
340
+ if mask_width / mask_height > aspect_ratio:
341
+ new_height = mask_height
342
+ new_width = int(new_height * aspect_ratio)
343
+ else:
344
+ new_width = mask_width
345
+ new_height = int(new_width / aspect_ratio)
346
+ reference_image_resized = reference_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
347
+ guided_image = original_image.copy().convert("RGB")
348
+ paste_x = mask_x1 + (mask_width - new_width) // 2
349
+ paste_y = mask_y1 + (mask_height - new_height) // 2
350
+ guided_image.paste(reference_image_resized, (paste_x, paste_y), reference_image_resized)
351
+ mask_image = generate_rectangular_mask(original_image.size, mask_x1, mask_y1, mask_x2, mask_y2)
352
+ return guided_image, mask_image
353
+
354
+ # Endpoints
355
+ @app.post("/inpaint/")
356
+ async def inpaint_image(
357
+ image: UploadFile = File(...),
358
+ mask: UploadFile = File(...),
359
+ prompt: str = "Fill the masked area with appropriate content."
360
+ ):
361
+ try:
362
+ image_bytes = await image.read()
363
+ mask_bytes = await mask.read()
364
+ original_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
365
+ mask_image = Image.open(io.BytesIO(mask_bytes)).convert("L")
366
+ if original_image.size != mask_image.size:
367
+ raise HTTPException(status_code=400, detail="Image and mask dimensions must match.")
368
+ result = pipe_runway(prompt=prompt, image=original_image, mask_image=mask_image).images[0]
369
+ result_bytes = io.BytesIO()
370
+ result.save(result_bytes, format="PNG")
371
+ result_bytes.seek(0)
372
+ return StreamingResponse(
373
+ result_bytes,
374
+ media_type="image/png",
375
+ headers={"Content-Disposition": "attachment; filename=inpainted_image.png"}
376
+ )
377
+ except Exception as e:
378
+ raise HTTPException(status_code=500, detail=f"Error during inpainting: {e}")
379
+
380
+ @app.post("/inpaint-with-reference/")
381
+ async def inpaint_with_reference(
382
+ image: UploadFile = File(...),
383
+ reference_image: UploadFile = File(...),
384
+ prompt: str = "Integrate the reference content naturally into the masked area, matching style and lighting.",
385
+ mask_x1: int = 100,
386
+ mask_y1: int = 100,
387
+ mask_x2: int = 200,
388
+ mask_y2: int = 200
389
+ ):
390
+ try:
391
+ image_bytes = await image.read()
392
+ reference_bytes = await reference_image.read()
393
+ original_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
394
+ reference_image = Image.open(io.BytesIO(reference_bytes)).convert("RGB")
395
+ if original_image.size != reference_image.size:
396
+ reference_image = reference_image.resize(original_image.size, Image.Resampling.LANCZOS)
397
+ mask_image = generate_rectangular_mask(original_image.size, mask_x1, mask_y1, mask_x2, mask_y2)
398
+ softened_mask = soften_mask(mask_image, softness=5)
399
+ guided_image = prepare_guided_image(original_image, reference_image, softened_mask)
400
+ result = pipe_runway(
401
+ prompt=prompt,
402
+ image=guided_image,
403
+ mask_image=softened_mask,
404
+ strength=0.75,
405
+ guidance_scale=7.5
406
+ ).images[0]
407
+ result_bytes = io.BytesIO()
408
+ result.save(result_bytes, format="PNG")
409
+ result_bytes.seek(0)
410
+ return StreamingResponse(
411
+ result_bytes,
412
+ media_type="image/png",
413
+ headers={"Content-Disposition": "attachment; filename=natural_inpaint_image.png"}
414
+ )
415
+ except Exception as e:
416
+ raise HTTPException(status_code=500, detail=f"Error during natural inpainting: {e}")
417
+
418
+ @app.post("/fit-image-to-mask/")
419
+ async def fit_image_to_mask_endpoint(
420
+ image: UploadFile = File(...),
421
+ reference_image: UploadFile = File(...),
422
+ mask_x1: int = 200,
423
+ mask_y1: int = 200,
424
+ mask_x2: int = 500,
425
+ mask_y2: int = 500
426
+ ):
427
+ try:
428
+ image_bytes = await image.read()
429
+ reference_bytes = await reference_image.read()
430
+ original_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
431
+ reference_image = Image.open(io.BytesIO(reference_bytes)).convert("RGB")
432
+ camouflaged_tank = await apply_camouflage_to_tank(reference_image)
433
+ guided_image, mask_image = fit_image_to_mask(original_image, camouflaged_tank, mask_x1, mask_y1, mask_x2, mask_y2)
434
+ guided_image.save("guided_image_before_blending.png")
435
+ softened_mask = soften_mask(mask_image, softness=2)
436
+ result = pipe_runway(
437
+ prompt="Blend the camouflaged tank into the grassy field with trees, ensuring a non-snowy environment, matching the style, lighting, and surroundings.",
438
+ image=guided_image,
439
+ mask_image=softened_mask,
440
+ strength=0.2,
441
+ guidance_scale=7.5,
442
+ num_inference_steps=50,
443
+ negative_prompt="snow, ice, rock, stone, boat, unrelated objects"
444
+ ).images[0]
445
+ result_bytes = io.BytesIO()
446
+ result.save(result_bytes, format="PNG")
447
+ result_bytes.seek(0)
448
+ return StreamingResponse(
449
+ result_bytes,
450
+ media_type="image/png",
451
+ headers={"Content-Disposition": "attachment; filename=fitted_image.png"}
452
+ )
453
+ except ValueError as ve:
454
+ raise HTTPException(status_code=400, detail=f"ValueError in processing: {str(ve)}")
455
+ except Exception as e:
456
+ raise HTTPException(status_code=500, detail=f"Error during fitting and inpainting: {str(e)}")
457
+
458
+
459
+ if __name__ == "__main__":
460
+ import uvicorn
461
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt CHANGED
@@ -8,4 +8,6 @@ transformers
8
  pillow
9
  numpy
10
  blenderproc
11
- python-multipart
 
 
 
8
  pillow
9
  numpy
10
  blenderproc
11
+ python-multipart
12
+ opencv-python
13
+ opencv-python-headless