Spaces:

Norakneath
/

TestingYolo

Sleeping

App Files Files Community

Norakneath commited on Feb 20

Commit

8e8bfd2

verified ·

1 Parent(s): dcfa431

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -27

app.py CHANGED Viewed

@@ -108,10 +108,24 @@
 import gradio as gr
 from ultralytics import YOLO
 from PIL import Image, ImageDraw
-import numpy as np
-import io
-# Load YOLO model
 YOLO_MODEL_PATH = "best.pt"
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
@@ -127,10 +141,10 @@ def merge_boxes_into_lines(boxes, y_threshold=10):
     for i in range(1, len(boxes)):
         x1, y1, x2, y2 = boxes[i]
-        if abs(y1 - current_line[1]) < y_threshold:
-            current_line[0] = min(current_line[0], x1)
-            current_line[2] = max(current_line[2], x2)
-            current_line[3] = max(current_line[3], y2)
         else:
             merged_lines.append(current_line)
             current_line = list(boxes[i])
@@ -138,46 +152,66 @@ def merge_boxes_into_lines(boxes, y_threshold=10):
     merged_lines.append(current_line)
     return merged_lines
-def detect_text_lines(image):
-    """Detects text lines and returns the image with bounding boxes."""
     image = Image.fromarray(image)
     original_image = image.copy()
     results = model.predict(image, conf=0.1, iou=0.2, device="cpu")
     detected_boxes = results[0].boxes.xyxy.tolist()
     detected_boxes = [list(map(int, box)) for box in detected_boxes]
     merged_boxes = merge_boxes_into_lines(detected_boxes)
     draw = ImageDraw.Draw(original_image)
     for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
         draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
         draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
-    return original_image
-# Gradio UI for testing
 with gr.Blocks() as iface:
-    gr.Markdown("# 📜 Text Line Detection with Bounding Boxes")
-    image_input = gr.Image(type="numpy", label="Upload an image")
-    output_image = gr.Image(type="pil", label="Detected Text Lines")
     image_input.upload(
-        detect_text_lines,
         inputs=image_input,
-        outputs=output_image
     )
-### **Expose API for Telegram Bot**
-api_interface = gr.Interface(
-    fn=detect_text_lines,  # API function that returns the processed image
-    inputs=gr.Image(type="numpy"),
-    outputs="image"
-)
-# 🚀 Launch UI and API
 if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", server_port=7860, share=True)
-    api_interface.launch(server_name="0.0.0.0", server_port=7861, share=True)

 import gradio as gr
 from ultralytics import YOLO
 from PIL import Image, ImageDraw
+import pytesseract
+import subprocess
+# Ensure Tesseract OCR is installed and detected
+TESSERACT_PATH = "/usr/bin/tesseract"
+pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
+def check_tesseract():
+    """Check if Tesseract is installed and print its version."""
+    try:
+        tesseract_version = subprocess.check_output([TESSERACT_PATH, "--version"]).decode("utf-8").split("\n")[0]
+        print(f"Tesseract Version: {tesseract_version}")
+        return True
+    except Exception as e:
+        print(f"Tesseract not found: {e}")
+        return False
+# Load YOLO model (ensure best.pt exists in the working directory)
 YOLO_MODEL_PATH = "best.pt"
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
     for i in range(1, len(boxes)):
         x1, y1, x2, y2 = boxes[i]
+        if abs(y1 - current_line[1]) < y_threshold:  # Close enough to the previous line
+            current_line[0] = min(current_line[0], x1)  # Extend left boundary
+            current_line[2] = max(current_line[2], x2)  # Extend right boundary
+            current_line[3] = max(current_line[3], y2)  # Extend bottom boundary
         else:
             merged_lines.append(current_line)
             current_line = list(boxes[i])
     merged_lines.append(current_line)
     return merged_lines
+def detect_and_ocr(image):
+    """Detects text lines, draws bounding boxes, and runs OCR on the entire image."""
     image = Image.fromarray(image)
     original_image = image.copy()
+    # Run YOLO text detection
     results = model.predict(image, conf=0.1, iou=0.2, device="cpu")
     detected_boxes = results[0].boxes.xyxy.tolist()
     detected_boxes = [list(map(int, box)) for box in detected_boxes]
+    # Merge detected boxes into text lines
     merged_boxes = merge_boxes_into_lines(detected_boxes)
+    # Draw bounding boxes on the image
     draw = ImageDraw.Draw(original_image)
+    extracted_text_lines = []
     for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
         draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
         draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
+    # Perform OCR on the whole image after drawing bounding boxes
+    if check_tesseract():  # If Tesseract is installed, run OCR
+        try:
+            # Perform OCR on the entire image
+            ocr_text = pytesseract.image_to_string(image, lang="khm")
+            if ocr_text:
+                extracted_text_lines.append(ocr_text)
+        except Exception as e:
+            print(f"OCR failed: {e}")
+    full_text = "\n".join(extracted_text_lines) if extracted_text_lines else "⚠️ OCR not available."
+    return original_image, full_text
+# Gradio UI
 with gr.Blocks() as iface:
+    gr.Markdown("# 📜 Text Line Detection with Khmer OCR")
+    gr.Markdown("## 📷 Upload an image to detect text lines and extract Khmer text")
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 📤 Upload Image")
+            image_input = gr.Image(type="numpy", label="Upload an image")
+        with gr.Column(scale=1):
+            gr.Markdown("### 🖼 Annotated Image with Bounding Boxes")
+            output_annotated = gr.Image(type="pil", label="Detected Text Lines")
+    gr.Markdown("### 📝 Extracted Text (OCR Result)")
+    output_text = gr.Textbox(label="Extracted Text", lines=10)
     image_input.upload(
+        detect_and_ocr,
         inputs=image_input,
+        outputs=[output_annotated, output_text]
     )
+# 🚀 Ensure the app runs properly in Hugging Face Spaces
 if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)