Norakneath commited on
Commit
818d306
·
verified ·
1 Parent(s): 50437ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -28
app.py CHANGED
@@ -1,18 +1,17 @@
1
  import gradio as gr
2
  from ultralytics import YOLO
3
  from PIL import Image, ImageDraw
4
- import numpy as np
5
 
6
  # Load YOLO model
7
  YOLO_MODEL_PATH = "best.pt"
8
  model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu") # Force CPU usage
9
 
10
- def merge_boxes_into_lines(boxes, y_threshold=20):
11
  """
12
- Merge bounding boxes that are close together in the y-axis (same line).
13
  Args:
14
  boxes: List of bounding boxes [x1, y1, x2, y2]
15
- y_threshold: Max distance between words to consider as the same line
16
  Returns:
17
  List of merged line bounding boxes
18
  """
@@ -28,53 +27,53 @@ def merge_boxes_into_lines(boxes, y_threshold=20):
28
  for i in range(1, len(boxes)):
29
  x1, y1, x2, y2 = boxes[i]
30
 
31
- # Merge boxes that are close in the y-axis
32
  if abs(y1 - current_line[1]) < y_threshold:
33
  current_line[0] = min(current_line[0], x1) # Expand left boundary
34
  current_line[2] = max(current_line[2], x2) # Expand right boundary
35
  current_line[3] = max(current_line[3], y2) # Expand bottom boundary
36
  else:
 
37
  merged_lines.append(current_line)
38
  current_line = list(boxes[i])
39
 
40
  merged_lines.append(current_line)
41
  return merged_lines
42
 
43
- def detect_lines(image, resize=False, target_size=(640, 640)):
44
  """
45
- Detects text lines using YOLO and merges bounding boxes.
46
  Args:
47
  image: Input image (PIL format)
48
- resize: Boolean, whether to resize image before detection
49
- target_size: Tuple (width, height) for resizing
50
  Returns:
51
- Image with bounding boxes drawn, Number of detected boxes
52
  """
53
  image = Image.fromarray(image) # Convert NumPy array to PIL Image
54
  original_image = image.copy() # Keep a copy of the original image
55
 
56
- if resize:
57
- image = image.resize(target_size, Image.LANCZOS)
58
-
59
- # Run YOLO detection
60
  results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
61
  detected_boxes = results[0].boxes.xyxy.tolist()
62
  detected_boxes = [list(map(int, box)) for box in detected_boxes] # Convert to integer
63
 
64
- # Merge bounding boxes into full text lines
65
  merged_boxes = merge_boxes_into_lines(detected_boxes)
66
 
67
  # Draw bounding boxes
68
- image_with_boxes = original_image if not resize else image.copy()
69
- draw = ImageDraw.Draw(image_with_boxes)
70
 
71
  for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
72
  draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
73
  draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
74
 
75
- return image_with_boxes, len(merged_boxes)
 
 
 
 
76
 
77
- # Define Gradio interface with two options: Original & Resized detection
78
  with gr.Blocks() as iface:
79
  gr.Markdown("# Text Line Detection")
80
  gr.Markdown("## Input your custom image for text line detection")
@@ -85,19 +84,17 @@ with gr.Blocks() as iface:
85
  image_input = gr.Image(type="numpy", label="Upload an image")
86
 
87
  with gr.Column(scale=1):
88
- gr.Markdown("### YOLO on Original Image")
89
- output_original = gr.Image(type="pil", label="Detected Lines (Original Size)")
90
- count_original = gr.Textbox(label="Number of Detected Lines (Original Size)")
91
 
92
- with gr.Column(scale=1):
93
- gr.Markdown("### YOLO on Resized Image (640x640)")
94
- output_resized = gr.Image(type="pil", label="Detected Lines (Resized to 640x640)")
95
- count_resized = gr.Textbox(label="Number of Detected Lines (Resized)")
96
 
97
  image_input.upload(
98
- lambda img: (*detect_lines(img, resize=False), *detect_lines(img, resize=True, target_size=(640, 640))),
99
  inputs=image_input,
100
- outputs=[output_original, count_original, output_resized, count_resized]
101
  )
102
 
103
  # Launch Gradio interface
 
1
  import gradio as gr
2
  from ultralytics import YOLO
3
  from PIL import Image, ImageDraw
 
4
 
5
  # Load YOLO model
6
  YOLO_MODEL_PATH = "best.pt"
7
  model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu") # Force CPU usage
8
 
9
+ def merge_boxes_into_lines(boxes, y_threshold=10):
10
  """
11
+ Merge bounding boxes that are on the same row but not merge different row lines.
12
  Args:
13
  boxes: List of bounding boxes [x1, y1, x2, y2]
14
+ y_threshold: Max difference in y1 position to be considered the same row
15
  Returns:
16
  List of merged line bounding boxes
17
  """
 
27
  for i in range(1, len(boxes)):
28
  x1, y1, x2, y2 = boxes[i]
29
 
30
+ # Merge only if y position is very close (same row)
31
  if abs(y1 - current_line[1]) < y_threshold:
32
  current_line[0] = min(current_line[0], x1) # Expand left boundary
33
  current_line[2] = max(current_line[2], x2) # Expand right boundary
34
  current_line[3] = max(current_line[3], y2) # Expand bottom boundary
35
  else:
36
+ # Store previous line and start a new one
37
  merged_lines.append(current_line)
38
  current_line = list(boxes[i])
39
 
40
  merged_lines.append(current_line)
41
  return merged_lines
42
 
43
+ def detect_and_crop_lines(image):
44
  """
45
+ Detects text lines using YOLO, merges them, and crops each line.
46
  Args:
47
  image: Input image (PIL format)
 
 
48
  Returns:
49
+ Annotated image with bounding boxes, List of cropped images
50
  """
51
  image = Image.fromarray(image) # Convert NumPy array to PIL Image
52
  original_image = image.copy() # Keep a copy of the original image
53
 
54
+ # Run YOLO detection on the original image
 
 
 
55
  results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
56
  detected_boxes = results[0].boxes.xyxy.tolist()
57
  detected_boxes = [list(map(int, box)) for box in detected_boxes] # Convert to integer
58
 
59
+ # Merge bounding boxes based on row position
60
  merged_boxes = merge_boxes_into_lines(detected_boxes)
61
 
62
  # Draw bounding boxes
63
+ draw = ImageDraw.Draw(original_image)
64
+ cropped_lines = []
65
 
66
  for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
67
  draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
68
  draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
69
 
70
+ # Crop the detected text line
71
+ cropped_line = image.crop((x1, y1, x2, y2))
72
+ cropped_lines.append(cropped_line)
73
+
74
+ return original_image, cropped_lines
75
 
76
+ # Define Gradio interface
77
  with gr.Blocks() as iface:
78
  gr.Markdown("# Text Line Detection")
79
  gr.Markdown("## Input your custom image for text line detection")
 
84
  image_input = gr.Image(type="numpy", label="Upload an image")
85
 
86
  with gr.Column(scale=1):
87
+ gr.Markdown("### Annotated Image with Detected Lines")
88
+ output_annotated = gr.Image(type="pil", label="Detected Text Lines")
 
89
 
90
+ gr.Markdown("### Cropped Text Lines (Each Line Detected Separately)")
91
+
92
+ cropped_gallery = gr.Gallery(label="Cropped Lines Gallery", columns=3, preview=True)
 
93
 
94
  image_input.upload(
95
+ lambda img: detect_and_crop_lines(img),
96
  inputs=image_input,
97
+ outputs=[output_annotated, cropped_gallery]
98
  )
99
 
100
  # Launch Gradio interface