Spaces:

Norakneath
/

TestingYolo

Sleeping

App Files Files Community

Norakneath commited on Feb 12

Commit

818d306

verified ·

1 Parent(s): 50437ea

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -28

app.py CHANGED Viewed

@@ -1,18 +1,17 @@
 import gradio as gr
 from ultralytics import YOLO
 from PIL import Image, ImageDraw
-import numpy as np
 # Load YOLO model
 YOLO_MODEL_PATH = "best.pt"
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")  # Force CPU usage
-def merge_boxes_into_lines(boxes, y_threshold=20):
     """
-    Merge bounding boxes that are close together in the y-axis (same line).
     Args:
         boxes: List of bounding boxes [x1, y1, x2, y2]
-        y_threshold: Max distance between words to consider as the same line
     Returns:
         List of merged line bounding boxes
     """
@@ -28,53 +27,53 @@ def merge_boxes_into_lines(boxes, y_threshold=20):
     for i in range(1, len(boxes)):
         x1, y1, x2, y2 = boxes[i]
-        # Merge boxes that are close in the y-axis
         if abs(y1 - current_line[1]) < y_threshold:
             current_line[0] = min(current_line[0], x1)  # Expand left boundary
             current_line[2] = max(current_line[2], x2)  # Expand right boundary
             current_line[3] = max(current_line[3], y2)  # Expand bottom boundary
         else:
             merged_lines.append(current_line)
             current_line = list(boxes[i])
     merged_lines.append(current_line)
     return merged_lines
-def detect_lines(image, resize=False, target_size=(640, 640)):
     """
-    Detects text lines using YOLO and merges bounding boxes.
     Args:
         image: Input image (PIL format)
-        resize: Boolean, whether to resize image before detection
-        target_size: Tuple (width, height) for resizing
     Returns:
-        Image with bounding boxes drawn, Number of detected boxes
     """
     image = Image.fromarray(image)  # Convert NumPy array to PIL Image
     original_image = image.copy()  # Keep a copy of the original image
-    if resize:
-        image = image.resize(target_size, Image.LANCZOS)
-    # Run YOLO detection
     results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
     detected_boxes = results[0].boxes.xyxy.tolist()
     detected_boxes = [list(map(int, box)) for box in detected_boxes]  # Convert to integer
-    # Merge bounding boxes into full text lines
     merged_boxes = merge_boxes_into_lines(detected_boxes)
     # Draw bounding boxes
-    image_with_boxes = original_image if not resize else image.copy()
-    draw = ImageDraw.Draw(image_with_boxes)
     for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
         draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
         draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
-    return image_with_boxes, len(merged_boxes)
-# Define Gradio interface with two options: Original & Resized detection
 with gr.Blocks() as iface:
     gr.Markdown("# Text Line Detection")
     gr.Markdown("## Input your custom image for text line detection")
@@ -85,19 +84,17 @@ with gr.Blocks() as iface:
             image_input = gr.Image(type="numpy", label="Upload an image")
         with gr.Column(scale=1):
-            gr.Markdown("### YOLO on Original Image")
-            output_original = gr.Image(type="pil", label="Detected Lines (Original Size)")
-            count_original = gr.Textbox(label="Number of Detected Lines (Original Size)")
-        with gr.Column(scale=1):
-            gr.Markdown("### YOLO on Resized Image (640x640)")
-            output_resized = gr.Image(type="pil", label="Detected Lines (Resized to 640x640)")
-            count_resized = gr.Textbox(label="Number of Detected Lines (Resized)")
     image_input.upload(
-        lambda img: (*detect_lines(img, resize=False), *detect_lines(img, resize=True, target_size=(640, 640))),
         inputs=image_input,
-        outputs=[output_original, count_original, output_resized, count_resized]
     )
 # Launch Gradio interface

 import gradio as gr
 from ultralytics import YOLO
 from PIL import Image, ImageDraw
 # Load YOLO model
 YOLO_MODEL_PATH = "best.pt"
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")  # Force CPU usage
+def merge_boxes_into_lines(boxes, y_threshold=10):
     """
+    Merge bounding boxes that are on the same row but not merge different row lines.
     Args:
         boxes: List of bounding boxes [x1, y1, x2, y2]
+        y_threshold: Max difference in y1 position to be considered the same row
     Returns:
         List of merged line bounding boxes
     """
     for i in range(1, len(boxes)):
         x1, y1, x2, y2 = boxes[i]
+        # Merge only if y position is very close (same row)
         if abs(y1 - current_line[1]) < y_threshold:
             current_line[0] = min(current_line[0], x1)  # Expand left boundary
             current_line[2] = max(current_line[2], x2)  # Expand right boundary
             current_line[3] = max(current_line[3], y2)  # Expand bottom boundary
         else:
+            # Store previous line and start a new one
             merged_lines.append(current_line)
             current_line = list(boxes[i])
     merged_lines.append(current_line)
     return merged_lines
+def detect_and_crop_lines(image):
     """
+    Detects text lines using YOLO, merges them, and crops each line.
     Args:
         image: Input image (PIL format)
     Returns:
+        Annotated image with bounding boxes, List of cropped images
     """
     image = Image.fromarray(image)  # Convert NumPy array to PIL Image
     original_image = image.copy()  # Keep a copy of the original image
+    # Run YOLO detection on the original image
     results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
     detected_boxes = results[0].boxes.xyxy.tolist()
     detected_boxes = [list(map(int, box)) for box in detected_boxes]  # Convert to integer
+    # Merge bounding boxes based on row position
     merged_boxes = merge_boxes_into_lines(detected_boxes)
     # Draw bounding boxes
+    draw = ImageDraw.Draw(original_image)
+    cropped_lines = []
     for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
         draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
         draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
+        # Crop the detected text line
+        cropped_line = image.crop((x1, y1, x2, y2))
+        cropped_lines.append(cropped_line)
+    return original_image, cropped_lines
+# Define Gradio interface
 with gr.Blocks() as iface:
     gr.Markdown("# Text Line Detection")
     gr.Markdown("## Input your custom image for text line detection")
             image_input = gr.Image(type="numpy", label="Upload an image")
         with gr.Column(scale=1):
+            gr.Markdown("### Annotated Image with Detected Lines")
+            output_annotated = gr.Image(type="pil", label="Detected Text Lines")
+    gr.Markdown("### Cropped Text Lines (Each Line Detected Separately)")
+    cropped_gallery = gr.Gallery(label="Cropped Lines Gallery", columns=3, preview=True)
     image_input.upload(
+        lambda img: detect_and_crop_lines(img),
         inputs=image_input,
+        outputs=[output_annotated, cropped_gallery]
     )
 # Launch Gradio interface