Pushpanjali commited on
Commit
bce64d2
·
1 Parent(s): 97b6676

adding files

Browse files
Files changed (1) hide show
  1. yolo_functions.py +555 -0
yolo_functions.py ADDED
@@ -0,0 +1,555 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import random
5
+ import hashlib
6
+ import os
7
+ import numpy as np
8
+ import hashlib
9
+ import random
10
+ import matplotlib.pyplot as plt
11
+ import cv2
12
+ import easyocr
13
+ import pytesseract
14
+
15
+
16
+
17
+ from ultralytics import YOLO
18
+
19
+ # 1. Load a YOLOv8 segmentation model (pre-trained weights)
20
+ model = YOLO("best.pt")
21
+
22
+ def get_label_color_id(label_id):
23
+ """
24
+ Generate a consistent BGR color for a numeric label_id by hashing the ID.
25
+ This ensures that each numeric ID always maps to the same color.
26
+ """
27
+ label_str = str(int(label_id))
28
+ # Use the MD5 hash of the label string as a seed
29
+ seed_value = int(hashlib.md5(label_str.encode('utf-8')).hexdigest(), 16)
30
+ random.seed(seed_value)
31
+ # Return color in BGR format
32
+ return (
33
+ random.randint(50, 255), # B
34
+ random.randint(50, 255), # G
35
+ random.randint(50, 255) # R
36
+ )
37
+
38
+ def segment_large_image_with_tiles(
39
+ model,
40
+ large_image_path,
41
+ tile_size=1080,
42
+ overlap=60, # Overlap in pixels
43
+ alpha=0.4,
44
+ display=True
45
+ ):
46
+ """
47
+ 1. Reads a large image from `large_image_path`.
48
+ 2. Tiles it into sub-images of size `tile_size` x `tile_size`,
49
+ stepping by (tile_size - overlap) to have overlap regions.
50
+ 3. Runs `model.predict()` on each tile and accumulates all polygons (in global coords).
51
+ 4. For each class, merges overlapping polygons by:
52
+ - filling them on a single-channel mask
53
+ - finding final contours of the connected regions
54
+ 5. Draws merged polygons onto an overlay and alpha-blends with the original image.
55
+ 6. Returns the final annotated image (in RGB) and a dictionary of merged contours.
56
+ """
57
+
58
+ # Read the large image
59
+ image_bgr = cv2.imread(large_image_path)
60
+ if image_bgr is None:
61
+ raise ValueError(f"Could not load image from {large_image_path}")
62
+
63
+ # Convert to RGB (for plotting consistency)
64
+ image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
65
+ H, W, _ = image_rgb.shape
66
+
67
+ # Dictionary to store raw polygon coords for each class
68
+ # (before merging)
69
+ class_mask_dict = {}
70
+
71
+ # Step size with overlap
72
+ step = tile_size - overlap if overlap < tile_size else tile_size
73
+
74
+ # ------------------------
75
+ # 1) Perform Tiled Inference
76
+ # ------------------------
77
+ for top in range(0, H, step):
78
+ for left in range(0, W, step):
79
+ bottom = min(top + tile_size, H)
80
+ right = min(left + tile_size, W)
81
+
82
+ tile_rgb = image_rgb[top:bottom, left:right]
83
+
84
+ # Run YOLOv8 model prediction
85
+ results = model.predict(tile_rgb)
86
+ if len(results) == 0:
87
+ continue
88
+
89
+ # Typically, results[0] holds the main predictions
90
+ pred = results[0]
91
+
92
+ # Check if we have valid masks
93
+ if (pred.masks is None) or (pred.masks.xy is None):
94
+ continue
95
+
96
+ tile_masks_xy = pred.masks.xy # list of polygon coords
97
+ tile_labels = pred.boxes.cls # list of class IDs
98
+
99
+ # Convert to numpy int if needed
100
+ if hasattr(tile_labels, 'cpu'):
101
+ tile_labels = tile_labels.cpu().numpy()
102
+ tile_labels = tile_labels.astype(int).tolist()
103
+
104
+ # Accumulate polygon coords in global space
105
+ for label_id, polygon in zip(tile_labels, tile_masks_xy):
106
+ # Convert polygon float coords to int points in shape (N,1,2)
107
+ polygon_pts = polygon.reshape((-1, 1, 2)).astype(np.int32)
108
+
109
+ # Offset the polygon to the large image coords
110
+ polygon_pts[:, 0, 0] += left # x-offset
111
+ polygon_pts[:, 0, 1] += top # y-offset
112
+
113
+ if label_id not in class_mask_dict:
114
+ class_mask_dict[label_id] = []
115
+ class_mask_dict[label_id].append(polygon_pts)
116
+
117
+ # -----------------------------------------
118
+ # 2) Merge Overlapping Polygons For Each Class
119
+ # by rasterizing them in a mask and then
120
+ # finding final contours
121
+ # -----------------------------------------
122
+ merged_class_mask_dict = {}
123
+ for label_id, polygons_cv in class_mask_dict.items():
124
+ # Create a blank mask (single channel) for the entire image
125
+ mask = np.zeros((H, W), dtype=np.uint8)
126
+
127
+ # Fill all polygons for this label on the mask
128
+ for pts in polygons_cv:
129
+ cv2.fillPoly(mask, [pts], 255)
130
+
131
+ # Now findContours to get merged regions
132
+ # Use RETR_EXTERNAL so we just get outer boundaries of each connected region
133
+ contours, _ = cv2.findContours(
134
+ mask,
135
+ mode=cv2.RETR_EXTERNAL,
136
+ method=cv2.CHAIN_APPROX_SIMPLE
137
+ )
138
+
139
+ # Store final merged contours
140
+ merged_class_mask_dict[label_id] = contours
141
+
142
+ # -----------------------
143
+ # 3) Draw Merged Polygons
144
+ # -----------------------
145
+ overlay = image_rgb.copy()
146
+ for label_id, contours in merged_class_mask_dict.items():
147
+ color_bgr = get_label_color_id(label_id)
148
+ for cnt in contours:
149
+ # Fill each contour on the overlay
150
+ cv2.fillPoly(overlay, [cnt], color_bgr)
151
+
152
+ # 4) Alpha blend
153
+ output = cv2.addWeighted(overlay, alpha, image_rgb, 1 - alpha, 0)
154
+
155
+ # 5) Optional Display
156
+ if display:
157
+ plt.figure(figsize=(12, 12))
158
+ plt.imshow(output)
159
+ plt.axis('off')
160
+ plt.title("Segmentation on Large Image (Overlapped Tiles + Merged Polygons)")
161
+ plt.show()
162
+
163
+ return output, merged_class_mask_dict
164
+
165
+ def usable_data(img_results, image_1):
166
+ """
167
+ Extract bounding boxes, centers, and polygon areas from the segmentation
168
+ results for a single image. Returns a dictionary keyed by label,
169
+ with each value a list of object data: { 'bbox', 'center', 'area' }.
170
+ """
171
+ width, height = image_1.width, image_1.height
172
+ image_data = {}
173
+ for key in img_results.keys():
174
+ image_data[key] = []
175
+ for polygon in img_results[key]:
176
+ polygon = np.array(polygon)
177
+
178
+ # Handle varying polygon shapes
179
+ # If shape is (N, 1, 2) e.g. from cv2 findContours
180
+ if polygon.ndim == 3 and polygon.shape[1] == 1 and polygon.shape[2] == 2:
181
+ polygon = polygon.reshape(-1, 2)
182
+ elif polygon.ndim == 2 and polygon.shape[1] == 1:
183
+ polygon = np.squeeze(polygon, axis=1)
184
+
185
+ # Now we expect polygon to be (N, 2):
186
+ xs = polygon[:, 0]
187
+ ys = polygon[:, 1]
188
+
189
+ # Bounding box
190
+ xmin, xmax = xs.min(), xs.max()
191
+ ymin, ymax = ys.min(), ys.max()
192
+ bbox = (xmin, ymin, xmax, ymax)
193
+
194
+ # Center
195
+ centerX = (xmin + xmax) / 2.0
196
+ centerY = (ymin + ymax) / 2.0
197
+ x = width/2
198
+ y = height/2
199
+ # Direction
200
+ dx = x - centerX
201
+ dy = centerY - y # Invert y-axis for proper orientation
202
+ if dx > 0 and dy > 0:
203
+ direction = "NE"
204
+ elif dx > 0 and dy < 0:
205
+ direction = "SE"
206
+ elif dx < 0 and dy > 0:
207
+ direction = "NW"
208
+ elif dx < 0 and dy < 0:
209
+ direction = "SW"
210
+ elif dx == 0 and dy > 0:
211
+ direction = "N"
212
+ elif dx == 0 and dy < 0:
213
+ direction = "S"
214
+ elif dy == 0 and dx > 0:
215
+ direction = "E"
216
+ elif dy == 0 and dx < 0:
217
+ direction = "W"
218
+ else:
219
+ direction = "Center"
220
+
221
+
222
+ # Polygon area (Shoelace formula)
223
+ # area = 0.5 * | x0*y1 + x1*y2 + ... + x_{n-1}*y0 - (y0*x1 + y1*x2 + ... + y_{n-1}*x0 ) |
224
+ area = 0.5 * np.abs(
225
+ np.dot(xs, np.roll(ys, 1)) - np.dot(ys, np.roll(xs, 1))
226
+ )
227
+
228
+ image_data[key].append({
229
+ 'bbox': bbox,
230
+ 'center': (centerX, centerY),
231
+ 'area': area,
232
+ "direction": direction
233
+ })
234
+ return image_data
235
+
236
+ import cv2
237
+ import numpy as np
238
+ import matplotlib.pyplot as plt
239
+
240
+ def plot_differences_on_image1(
241
+ image1_path,
242
+ mask_dict1, # e.g., label_name -> list of contours for image1
243
+ image2_path,
244
+ mask_dict2, # e.g., label_name -> list of contours for image2
245
+ display=True
246
+ ):
247
+ """
248
+ Compare two images (and their object masks). Plot all differences on Image 1 only:
249
+ - Red: Objects that are missing on Image 1 (present in Image 2 but not Image 1).
250
+ - Green: Objects that are missing on Image 2 (present in Image 1 but not Image 2).
251
+
252
+ :param image1_path: Path to the first image
253
+ :param mask_dict1: dict[label_name] = [contour1, contour2, ...] for the first image
254
+ :param image2_path: Path to the second image
255
+ :param mask_dict2: dict[label_name] = [contour1, contour2, ...] for the second image
256
+ :param display: If True, shows the final overlay with matplotlib.
257
+ :return: A tuple:
258
+ - overlay1 (numpy array in RGB) with all differences highlighted
259
+ - list_of_differences: Names of labels with differences
260
+ - difference_masks: A dict with keys "missing_on_img1" and "missing_on_img2",
261
+ where each key maps to a list of contours (original format) for the respective differences.
262
+ """
263
+
264
+ # Read both images
265
+ img1_bgr = cv2.imread(image1_path)
266
+ img2_bgr = cv2.imread(image2_path)
267
+ if img1_bgr is None or img2_bgr is None:
268
+ raise ValueError("Could not read one of the input images.")
269
+
270
+ # Convert to RGB
271
+ img1_rgb = cv2.cvtColor(img1_bgr, cv2.COLOR_BGR2RGB)
272
+ img2_rgb = cv2.cvtColor(img2_bgr, cv2.COLOR_BGR2RGB)
273
+
274
+ # Check matching dimensions
275
+ H1, W1, _ = img1_rgb.shape
276
+ H2, W2, _ = img2_rgb.shape
277
+ if (H1 != H2) or (W1 != W2):
278
+ raise ValueError("Images must be the same size to compare masks reliably.")
279
+
280
+ # Prepare an overlay on top of Image 1
281
+ overlay1 = img1_rgb.copy()
282
+
283
+ # Take the union of all labels in both dictionaries
284
+ all_labels = set(mask_dict1.keys()).union(set(mask_dict2.keys()))
285
+
286
+ # Colors:
287
+ RED = (255, 0, 0) # (R, G, B)
288
+ GREEN = (0, 255, 0) # (R, G, B)
289
+
290
+ # Track differences
291
+ list_of_differences = []
292
+ difference_masks = {
293
+ "missing_on_img1": {}, # dict[label_name] = list of contours
294
+ "missing_on_img2": {}, # dict[label_name] = list of contours
295
+ }
296
+
297
+ for label_id in all_labels:
298
+ # Create binary masks for this label in each image
299
+ mask1 = np.zeros((H1, W1), dtype=np.uint8)
300
+ mask2 = np.zeros((H1, W1), dtype=np.uint8)
301
+
302
+ # Fill polygons for label_id in Image 1
303
+ if label_id in mask_dict1:
304
+ for cnt in mask_dict1[label_id]:
305
+ cv2.fillPoly(mask1, [cnt], 255)
306
+
307
+ # Fill polygons for label_id in Image 2
308
+ if label_id in mask_dict2:
309
+ for cnt in mask_dict2[label_id]:
310
+ cv2.fillPoly(mask2, [cnt], 255)
311
+
312
+ # Missing on Image 1 (present in Image 2 but not in Image 1)
313
+ # => mask2 AND (NOT mask1)
314
+ missing_on_img1 = cv2.bitwise_and(mask2, cv2.bitwise_not(mask1))
315
+
316
+ # Missing on Image 2 (present in Image 1 but not in Image 2)
317
+ # => mask1 AND (NOT mask2)
318
+ missing_on_img2 = cv2.bitwise_and(mask1, cv2.bitwise_not(mask2))
319
+
320
+ # Extract contours of differences
321
+ contours_missing_on_img1, _ = cv2.findContours(
322
+ missing_on_img1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
323
+ )
324
+ contours_missing_on_img2, _ = cv2.findContours(
325
+ missing_on_img2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
326
+ )
327
+
328
+ # Store contours in difference masks
329
+ if contours_missing_on_img1:
330
+ difference_masks["missing_on_img1"][label_id] = contours_missing_on_img1
331
+ if contours_missing_on_img2:
332
+ difference_masks["missing_on_img2"][label_id] = contours_missing_on_img2
333
+
334
+ # If there are differences, track the label name
335
+ if contours_missing_on_img1 or contours_missing_on_img2:
336
+ list_of_differences.append(label_id)
337
+
338
+ # Color them on the overlay of Image 1:
339
+ for cnt in contours_missing_on_img1:
340
+ cv2.drawContours(overlay1, [cnt], -1, RED, -1) # highlight in red
341
+ for cnt in contours_missing_on_img2:
342
+ cv2.drawContours(overlay1, [cnt], -1, GREEN, -1) # highlight in green
343
+
344
+ # Display if required
345
+ if display:
346
+ plt.figure(figsize=(10, 8))
347
+ plt.imshow(overlay1)
348
+ plt.title("Differences on Image 1\n(Red: Missing on Image 1, Green: Missing on Image 2)")
349
+ plt.axis("off")
350
+ plt.show()
351
+
352
+ return overlay1, list_of_differences, difference_masks
353
+
354
+ def preprocess_image(image_path):
355
+ """
356
+ 1) Load and prepare the image for further analysis.
357
+ 2) Convert to grayscale, optionally binarize or threshold.
358
+ 3) Return the processed image.
359
+ """
360
+ img = cv2.imread(image_path)
361
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
362
+
363
+ # Optional: adaptive thresholding for clearer linework
364
+ # thresholded = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
365
+ # cv2.THRESH_BINARY, 11, 2)
366
+
367
+ return gray
368
+
369
+ def detect_lines_and_grid(processed_image):
370
+ """
371
+ 1) Detect major horizontal/vertical lines using Hough transform or morphological ops.
372
+ 2) Identify grid lines by analyzing line segments alignment.
373
+ 3) Returns lines or grid intersections.
374
+ """
375
+ edges = cv2.Canny(processed_image, 50, 150, apertureSize=3)
376
+
377
+ # Hough line detection for demonstration
378
+ lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100,
379
+ minLineLength=100, maxLineGap=10)
380
+ # Here you would parse out vertical/horizontal lines, cluster them, etc.
381
+
382
+ return lines
383
+
384
+ def run_ocr(processed_image, method='easyocr'):
385
+ """
386
+ 1) Use an OCR engine to detect text (room labels, dimensions, etc.).
387
+ 2) 'method' can switch between Tesseract or EasyOCR.
388
+ 3) Return recognized text data (text content and bounding boxes).
389
+ """
390
+ text_data = []
391
+
392
+ if method == 'easyocr':
393
+ reader = easyocr.Reader(['en', 'ko'], gpu=False)
394
+ result = reader.readtext(processed_image, detail=1, paragraph=False)
395
+ # result structure: [ [bbox, text, confidence], ... ]
396
+ for (bbox, text, conf) in result:
397
+ text_data.append({'bbox': bbox, 'text': text, 'confidence': conf})
398
+ else:
399
+ # Tesseract approach
400
+ config = r'--psm 6'
401
+ tess_result = pytesseract.image_to_data(processed_image, config=config, output_type=pytesseract.Output.DICT)
402
+ # parse data into a structured list
403
+ for i in range(len(tess_result['text'])):
404
+ txt = tess_result['text'][i].strip()
405
+ if txt:
406
+ x = tess_result['left'][i]
407
+ y = tess_result['top'][i]
408
+ w = tess_result['width'][i]
409
+ h = tess_result['height'][i]
410
+ conf = tess_result['conf'][i]
411
+ text_data.append({
412
+ 'bbox': (x, y, x+w, y+h),
413
+ 'text': txt,
414
+ 'confidence': conf
415
+ })
416
+ return text_data
417
+
418
+ def detect_symbols_and_rooms(processed_image):
419
+ """
420
+ 1) Potentially run object detection (e.g., YOLO, Detectron2) to detect symbols:
421
+ - Doors, balconies, fixtures, etc.
422
+ 2) Segment out rooms by combining wall detection + adjacency.
423
+ 3) Return data about room polygons, symbols, etc.
424
+ """
425
+ # Placeholder: real implementation would require a trained model or rule-based approach.
426
+ # For demonstration, return empty data.
427
+ rooms_data = []
428
+ symbols_data = []
429
+ return rooms_data, symbols_data
430
+
431
+
432
+
433
+ def blueprint_analyzer(image_path):
434
+ """
435
+ Orchestrate the entire pipeline on one image:
436
+ 1) Preprocess
437
+ 2) Detect structural lines
438
+ 3) OCR text detection
439
+ 4) Symbol/room detection
440
+ 5) Compute area differences or summarize
441
+ """
442
+ processed_img = preprocess_image(image_path)
443
+
444
+ lines = detect_lines_and_grid(processed_img)
445
+ text_data = run_ocr(processed_img, method='easyocr')
446
+
447
+
448
+ return lines, text_data
449
+
450
+
451
+
452
+ system_prompt = """You are given two construction blueprint images along with their segmentation data.
453
+
454
+ Do not present any numeric bounding box or area values in your final answer.
455
+ Instead, produce a concise, high-level descriptive summary of the differences, using relative location references or known blueprint areas (e.g., “balcony,” “bathroom,” “central hallway,” etc.).
456
+ Treat two objects as identical (and thus ignore them) if:
457
+ They have the same label/class, and
458
+ Their center coordinates are very close.
459
+ If possible, provide an OCR-based overview of changed text or lines in those areas. For example, mention if the balcony area contains new textual annotations or if certain labels have been removed/added.
460
+ Output the result in brief, correct Markdown summarizing only the differences between the images (e.g., newly added structures, missing items, changed labeling or text).
461
+ Remember: No numeric bounding box or area data should be included in the final response. Use location references (“in the top-right corner,” “in the balcony,” etc.) and class names to describe changes.
462
+ """
463
+
464
+ system_prompt_2 = """You are analyzing two construction blueprint images (Image 1 and Image 2). Each image has a set of detected objects, including “areas” like Balconies, Rooms, Hallways, etc., and smaller objects like Doors, Walls, or Stairs.
465
+
466
+ Key Points:
467
+
468
+ An object is considered to belong to an area if the object's center lies within or very close to that area’s bounding box.
469
+ Two objects in different images are considered the same object if:
470
+ They share the same label/class, and
471
+ Their centers are very close in coordinates. In such a case, ignore them (do not list them) because they have not changed significantly.
472
+ Focus only on describing the differences between Image 1 and Image 2, such as:
473
+ New objects or areas that appear in Image 2 but not in Image 1 (and vice versa).
474
+ Changes in labeling or text (e.g., from an OCR perspective).
475
+ Changes in object location or area assignment.
476
+ Do NOT output numeric bounding boxes, polygon areas, or center coordinates in your final explanation. Instead, provide a relative or area-based description (e.g., “The door is now located in the balcony,” “There are two new doors in the living room,” “A new label is added near the main hallway,” etc.).
477
+ Produce a concise and correct Markdown summary that highlights only significant differences.
478
+
479
+ """
480
+
481
+ system_prompt_3 = """You are analyzing two construction blueprint images (Image 1 and Image 2). For each image, you have:
482
+
483
+ A set of objects (walls, doors, stairs, etc.) along with information on their labels and centers.
484
+ A set of “areas” (e.g., “Balcony,” “Living Room,” “Hallway,” “Bathroom,” etc.) with bounding boxes to identify where each area is located.
485
+ Task Requirements:
486
+ Identify differences between Image 1 and Image 2:
487
+ Newly added objects in Image 2 that were not in Image 1.
488
+ Missing objects in Image 2 that were in Image 1.
489
+ Objects that have changed location or have changed labels.
490
+ Text or label changes, if available.
491
+ For missing or newly added objects, describe their location in terms of relative position or known areas (not raw coordinates):
492
+ For example, say “the missing doors were originally near the top-left corner, adjacent to the main hallway,” or “new walls have been added in the southeast corner, near the living room.”
493
+ Avoid including numeric bounding boxes, polygon areas, or centers in the final explanation.
494
+ If two objects (one in Image 1 and one in Image 2) have the same label and nearly identical centers, consider them the same object and do not report them as a difference.
495
+ Whenever possible, use known area labels to describe positions (e.g., “within the dining area,” “just north of the bathroom,” “adjacent to the balcony,” etc.).
496
+ Return a concise and correct Markdown summary with these differences, focusing on where changes occur.
497
+ """
498
+
499
+ system_prompt_4 = """You are given two sets of data from two blueprint images (Image 1 and Image 2). Along with each image’s extracted objects, you have:
500
+ A set of objects (walls, doors, stairs, etc.) along with information on their labels and centers.
501
+ A set of “areas” (e.g., “Balcony,” “Living Room,” “Hallway,” “Bathroom,” etc.) with bounding boxes to identify where each area is located.
502
+
503
+ A “nearest reference area” for each object, including a small textual description of distance and direction (e.g., “Door #2 is near the Balcony to the east”).
504
+ Identifications of which objects match across the two images (same label and close centers).
505
+ Your Task
506
+ Ignore any objects that match between the two images (same label, nearly identical location).
507
+ Summarize the differences: newly added or missing objects, label changes, and any changes in object location.
508
+ Use the relative position data (distance/direction text) to describe where each new or missing object is/was in terms of known areas (e.g., “the missing wall in the northern side of the corridor,” “the new door near the balcony,” etc.).
509
+ Do not output raw numeric distances, bounding boxes, or polygon areas in your final summary. Instead, give a natural-language location description (e.g., “near the east side of the main hallway,” “slightly south of the balcony,” etc.).
510
+ Provide your answer in a concise Markdown format, focusing only on significant differences."""
511
+
512
+ # user_prompt = f"""I have two construction blueprint images, Image 1 and Image 2, and here are their segmentation results (with bounding boxes, centers, and areas). Please compare them and provide a short Markdown summary of the differences, ignoring any objects that match in both images:
513
+
514
+ # Image 1:
515
+ # image: {image_1}
516
+
517
+ # json
518
+ # Copy
519
+ # {image_1_data}
520
+ # Image 2:
521
+ # image: {image_2}
522
+ # json
523
+ # Copy
524
+ # {image_2_data}
525
+
526
+ # Please:
527
+
528
+ # Compare the two images in terms of architectural/structural changes.
529
+ # Ignore objects that appear in both images (same label & near-identical centers).
530
+ # Refer to changes in relative location or in known blueprint areas (e.g. “balcony,” “living room,” “main hallway”), not numeric bounding boxes or polygon areas.
531
+ # Include mentions of new text or lines if any appear based on an OCR-like analysis.
532
+ # Output only the differences in a concise Markdown summary."""
533
+
534
+ # user_prompt_2 = f"""I have two construction blueprint images, Image 1 and Image 2, and here are their segmentation results (with bounding boxes, centers, and areas). Please compare them and provide a short Markdown summary of the differences, ignoring any objects that match in both images:
535
+
536
+ # Image 1:
537
+ # image: {image_1}
538
+
539
+ # json
540
+ # Copy
541
+ # {image_1_data}
542
+ # Image 2:
543
+ # image: {image_2}
544
+ # json
545
+ # Copy
546
+ # {image_2_data}
547
+
548
+ # Please:
549
+
550
+ # Ignore objects that appear in both images with matching labels and nearly identical centers.
551
+ # Use the bounding boxes of recognized “areas” (like “Balcony,” “Living Room,” “Bathroom,” etc.) to determine which area new or changed objects belong to. For instance, if a door’s center is inside or very close to the balcony’s bounding box, treat that door as being “in the balcony.”
552
+ # Do not display any raw bounding box coordinates, center points, or numeric area values in your final response.
553
+ # Summarize only the differences (e.g., newly added objects, missing objects, changed textual labels) in a brief Markdown format.
554
+ # Mention if there are text/label changes (e.g., from an OCR perspective) in any particular area or region"""
555
+