Spaces:

Norakneath
/

TestingYolo

Sleeping

App Files Files Community

Norakneath commited on Feb 20

Commit

78713c7

verified ·

1 Parent(s): 4848287

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -57

app.py CHANGED Viewed

@@ -1,24 +1,117 @@
 import gradio as gr
 from ultralytics import YOLO
 from PIL import Image, ImageDraw
-import pytesseract
-import subprocess
-# Ensure Tesseract OCR is installed and detected
-TESSERACT_PATH = "/usr/bin/tesseract"
-pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
-def check_tesseract():
-    """Check if Tesseract is installed and print its version."""
-    try:
-        tesseract_version = subprocess.check_output([TESSERACT_PATH, "--version"]).decode("utf-8").split("\n")[0]
-        print(f"Tesseract Version: {tesseract_version}")
-        return True
-    except Exception as e:
-        print(f"Tesseract not found: {e}")
-        return False
-# Load YOLO model (ensure best.pt exists in the working directory)
 YOLO_MODEL_PATH = "best.pt"
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
@@ -34,10 +127,10 @@ def merge_boxes_into_lines(boxes, y_threshold=10):
     for i in range(1, len(boxes)):
         x1, y1, x2, y2 = boxes[i]
-        if abs(y1 - current_line[1]) < y_threshold:  # Close enough to the previous line
-            current_line[0] = min(current_line[0], x1)  # Extend left boundary
-            current_line[2] = max(current_line[2], x2)  # Extend right boundary
-            current_line[3] = max(current_line[3], y2)  # Extend bottom boundary
         else:
             merged_lines.append(current_line)
             current_line = list(boxes[i])
@@ -45,8 +138,8 @@ def merge_boxes_into_lines(boxes, y_threshold=10):
     merged_lines.append(current_line)
     return merged_lines
-def detect_and_ocr(image):
-    """Detects text lines, draws bounding boxes, and runs OCR if available."""
     image = Image.fromarray(image)
     original_image = image.copy()
@@ -57,49 +150,34 @@ def detect_and_ocr(image):
     merged_boxes = merge_boxes_into_lines(detected_boxes)
     draw = ImageDraw.Draw(original_image)
-    extracted_text_lines = []
     for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
         draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
         draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
-        cropped_line = image.crop((x1, y1, x2, y2))
-        if check_tesseract():  # If Tesseract is installed, run OCR
-            try:
-                ocr_text = pytesseract.image_to_string(cropped_line, lang="khm".strip())
-                if ocr_text:
-                    extracted_text_lines.append(ocr_text)
-            except Exception as e:
-                print(f"OCR failed for line {idx}: {e}")
-    full_text = "\n".join(extracted_text_lines) if extracted_text_lines else "⚠️ OCR not available. Showing detected lines only."
-    return original_image, full_text
-# Gradio UI
 with gr.Blocks() as iface:
-    gr.Markdown("# 📜 Text Line Detection with Khmer OCR")
-    gr.Markdown("## 📷 Upload an image to detect text lines and extract Khmer text")
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### 📤 Upload Image")
-            image_input = gr.Image(type="numpy", label="Upload an image")
-        with gr.Column(scale=1):
-            gr.Markdown("### 🖼 Annotated Image with Bounding Boxes")
-            output_annotated = gr.Image(type="pil", label="Detected Text Lines")
-    gr.Markdown("### 📝 Extracted Text (OCR Result)")
-    output_text = gr.Textbox(label="Extracted Text", lines=10)
     image_input.upload(
-        detect_and_ocr,
         inputs=image_input,
-        outputs=[output_annotated, output_text]
     )
-# 🚀 Ensure the app runs properly in Hugging Face Spaces
 if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", server_port=7860)

+# import gradio as gr
+# from ultralytics import YOLO
+# from PIL import Image, ImageDraw
+# import pytesseract
+# import subprocess
+# # Ensure Tesseract OCR is installed and detected
+# TESSERACT_PATH = "/usr/bin/tesseract"
+# pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
+# def check_tesseract():
+#     """Check if Tesseract is installed and print its version."""
+#     try:
+#         tesseract_version = subprocess.check_output([TESSERACT_PATH, "--version"]).decode("utf-8").split("\n")[0]
+#         print(f"Tesseract Version: {tesseract_version}")
+#         return True
+#     except Exception as e:
+#         print(f"Tesseract not found: {e}")
+#         return False
+# # Load YOLO model (ensure best.pt exists in the working directory)
+# YOLO_MODEL_PATH = "best.pt"
+# model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
+# def merge_boxes_into_lines(boxes, y_threshold=10):
+#     """Merge bounding boxes if they belong to the same text row."""
+#     if len(boxes) == 0:
+#         return []
+#     boxes = sorted(boxes, key=lambda b: b[1])  # Sort by y-axis (top position)
+#     merged_lines = []
+#     current_line = list(boxes[0])
+#     for i in range(1, len(boxes)):
+#         x1, y1, x2, y2 = boxes[i]
+#         if abs(y1 - current_line[1]) < y_threshold:  # Close enough to the previous line
+#             current_line[0] = min(current_line[0], x1)  # Extend left boundary
+#             current_line[2] = max(current_line[2], x2)  # Extend right boundary
+#             current_line[3] = max(current_line[3], y2)  # Extend bottom boundary
+#         else:
+#             merged_lines.append(current_line)
+#             current_line = list(boxes[i])
+#     merged_lines.append(current_line)
+#     return merged_lines
+# def detect_and_ocr(image):
+#     """Detects text lines, draws bounding boxes, and runs OCR if available."""
+#     image = Image.fromarray(image)
+#     original_image = image.copy()
+#     results = model.predict(image, conf=0.1, iou=0.2, device="cpu")
+#     detected_boxes = results[0].boxes.xyxy.tolist()
+#     detected_boxes = [list(map(int, box)) for box in detected_boxes]
+#     merged_boxes = merge_boxes_into_lines(detected_boxes)
+#     draw = ImageDraw.Draw(original_image)
+#     extracted_text_lines = []
+#     for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
+#         draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
+#         draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
+#         cropped_line = image.crop((x1, y1, x2, y2))
+#         if check_tesseract():  # If Tesseract is installed, run OCR
+#             try:
+#                 ocr_text = pytesseract.image_to_string(cropped_line, lang="khm".strip())
+#                 if ocr_text:
+#                     extracted_text_lines.append(ocr_text)
+#             except Exception as e:
+#                 print(f"OCR failed for line {idx}: {e}")
+#     full_text = "\n".join(extracted_text_lines) if extracted_text_lines else "⚠️ OCR not available. Showing detected lines only."
+#     return original_image, full_text
+# # Gradio UI
+# with gr.Blocks() as iface:
+#     gr.Markdown("# 📜 Text Line Detection with Khmer OCR")
+#     gr.Markdown("## 📷 Upload an image to detect text lines and extract Khmer text")
+#     with gr.Row():
+#         with gr.Column(scale=1):
+#             gr.Markdown("### 📤 Upload Image")
+#             image_input = gr.Image(type="numpy", label="Upload an image")
+#         with gr.Column(scale=1):
+#             gr.Markdown("### 🖼 Annotated Image with Bounding Boxes")
+#             output_annotated = gr.Image(type="pil", label="Detected Text Lines")
+#     gr.Markdown("### 📝 Extracted Text (OCR Result)")
+#     output_text = gr.Textbox(label="Extracted Text", lines=10)
+#     image_input.upload(
+#         detect_and_ocr,
+#         inputs=image_input,
+#         outputs=[output_annotated, output_text]
+#     )
+# # 🚀 Ensure the app runs properly in Hugging Face Spaces
+# if __name__ == "__main__":
+#     iface.launch(server_name="0.0.0.0", server_port=7860)
 import gradio as gr
 from ultralytics import YOLO
 from PIL import Image, ImageDraw
+import numpy as np
+import io
+# Load YOLO model
 YOLO_MODEL_PATH = "best.pt"
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
     for i in range(1, len(boxes)):
         x1, y1, x2, y2 = boxes[i]
+        if abs(y1 - current_line[1]) < y_threshold:
+            current_line[0] = min(current_line[0], x1)
+            current_line[2] = max(current_line[2], x2)
+            current_line[3] = max(current_line[3], y2)
         else:
             merged_lines.append(current_line)
             current_line = list(boxes[i])
     merged_lines.append(current_line)
     return merged_lines
+def detect_text_lines(image):
+    """Detects text lines and returns the image with bounding boxes."""
     image = Image.fromarray(image)
     original_image = image.copy()
     merged_boxes = merge_boxes_into_lines(detected_boxes)
     draw = ImageDraw.Draw(original_image)
     for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
         draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
         draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
+    return original_image
+# Gradio UI for testing
 with gr.Blocks() as iface:
+    gr.Markdown("# 📜 Text Line Detection with Bounding Boxes")
+    image_input = gr.Image(type="numpy", label="Upload an image")
+    output_image = gr.Image(type="pil", label="Detected Text Lines")
     image_input.upload(
+        detect_text_lines,
         inputs=image_input,
+        outputs=output_image
     )
+### **Expose API for Telegram Bot**
+api_interface = gr.Interface(
+    fn=detect_text_lines,  # API function that returns the processed image
+    inputs=gr.Image(type="numpy"),
+    outputs="image"
+)
+# 🚀 Launch UI and API
 if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860, share=True)
+    api_interface.launch(server_name="0.0.0.0", server_port=7861, share=True)