Norakneath commited on
Commit
8e8bfd2
·
verified ·
1 Parent(s): dcfa431

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -27
app.py CHANGED
@@ -108,10 +108,24 @@
108
  import gradio as gr
109
  from ultralytics import YOLO
110
  from PIL import Image, ImageDraw
111
- import numpy as np
112
- import io
113
-
114
- # Load YOLO model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  YOLO_MODEL_PATH = "best.pt"
116
  model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
117
 
@@ -127,10 +141,10 @@ def merge_boxes_into_lines(boxes, y_threshold=10):
127
  for i in range(1, len(boxes)):
128
  x1, y1, x2, y2 = boxes[i]
129
 
130
- if abs(y1 - current_line[1]) < y_threshold:
131
- current_line[0] = min(current_line[0], x1)
132
- current_line[2] = max(current_line[2], x2)
133
- current_line[3] = max(current_line[3], y2)
134
  else:
135
  merged_lines.append(current_line)
136
  current_line = list(boxes[i])
@@ -138,46 +152,66 @@ def merge_boxes_into_lines(boxes, y_threshold=10):
138
  merged_lines.append(current_line)
139
  return merged_lines
140
 
141
- def detect_text_lines(image):
142
- """Detects text lines and returns the image with bounding boxes."""
143
  image = Image.fromarray(image)
144
  original_image = image.copy()
145
 
 
146
  results = model.predict(image, conf=0.1, iou=0.2, device="cpu")
147
  detected_boxes = results[0].boxes.xyxy.tolist()
148
  detected_boxes = [list(map(int, box)) for box in detected_boxes]
149
 
 
150
  merged_boxes = merge_boxes_into_lines(detected_boxes)
151
 
 
152
  draw = ImageDraw.Draw(original_image)
 
153
 
154
  for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
155
  draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
156
  draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
157
 
158
- return original_image
 
 
 
 
 
 
 
 
 
 
159
 
160
- # Gradio UI for testing
 
 
161
  with gr.Blocks() as iface:
162
- gr.Markdown("# 📜 Text Line Detection with Bounding Boxes")
163
- image_input = gr.Image(type="numpy", label="Upload an image")
164
- output_image = gr.Image(type="pil", label="Detected Text Lines")
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  image_input.upload(
167
- detect_text_lines,
168
  inputs=image_input,
169
- outputs=output_image
170
  )
171
 
172
- ### **Expose API for Telegram Bot**
173
- api_interface = gr.Interface(
174
- fn=detect_text_lines, # API function that returns the processed image
175
- inputs=gr.Image(type="numpy"),
176
- outputs="image"
177
- )
178
-
179
- # 🚀 Launch UI and API
180
  if __name__ == "__main__":
181
- iface.launch(server_name="0.0.0.0", server_port=7860, share=True)
182
- api_interface.launch(server_name="0.0.0.0", server_port=7861, share=True)
183
 
 
108
  import gradio as gr
109
  from ultralytics import YOLO
110
  from PIL import Image, ImageDraw
111
+ import pytesseract
112
+ import subprocess
113
+
114
+ # Ensure Tesseract OCR is installed and detected
115
+ TESSERACT_PATH = "/usr/bin/tesseract"
116
+ pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
117
+
118
+ def check_tesseract():
119
+ """Check if Tesseract is installed and print its version."""
120
+ try:
121
+ tesseract_version = subprocess.check_output([TESSERACT_PATH, "--version"]).decode("utf-8").split("\n")[0]
122
+ print(f"Tesseract Version: {tesseract_version}")
123
+ return True
124
+ except Exception as e:
125
+ print(f"Tesseract not found: {e}")
126
+ return False
127
+
128
+ # Load YOLO model (ensure best.pt exists in the working directory)
129
  YOLO_MODEL_PATH = "best.pt"
130
  model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
131
 
 
141
  for i in range(1, len(boxes)):
142
  x1, y1, x2, y2 = boxes[i]
143
 
144
+ if abs(y1 - current_line[1]) < y_threshold: # Close enough to the previous line
145
+ current_line[0] = min(current_line[0], x1) # Extend left boundary
146
+ current_line[2] = max(current_line[2], x2) # Extend right boundary
147
+ current_line[3] = max(current_line[3], y2) # Extend bottom boundary
148
  else:
149
  merged_lines.append(current_line)
150
  current_line = list(boxes[i])
 
152
  merged_lines.append(current_line)
153
  return merged_lines
154
 
155
+ def detect_and_ocr(image):
156
+ """Detects text lines, draws bounding boxes, and runs OCR on the entire image."""
157
  image = Image.fromarray(image)
158
  original_image = image.copy()
159
 
160
+ # Run YOLO text detection
161
  results = model.predict(image, conf=0.1, iou=0.2, device="cpu")
162
  detected_boxes = results[0].boxes.xyxy.tolist()
163
  detected_boxes = [list(map(int, box)) for box in detected_boxes]
164
 
165
+ # Merge detected boxes into text lines
166
  merged_boxes = merge_boxes_into_lines(detected_boxes)
167
 
168
+ # Draw bounding boxes on the image
169
  draw = ImageDraw.Draw(original_image)
170
+ extracted_text_lines = []
171
 
172
  for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
173
  draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
174
  draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
175
 
176
+ # Perform OCR on the whole image after drawing bounding boxes
177
+ if check_tesseract(): # If Tesseract is installed, run OCR
178
+ try:
179
+ # Perform OCR on the entire image
180
+ ocr_text = pytesseract.image_to_string(image, lang="khm")
181
+ if ocr_text:
182
+ extracted_text_lines.append(ocr_text)
183
+ except Exception as e:
184
+ print(f"OCR failed: {e}")
185
+
186
+ full_text = "\n".join(extracted_text_lines) if extracted_text_lines else "⚠️ OCR not available."
187
 
188
+ return original_image, full_text
189
+
190
+ # Gradio UI
191
  with gr.Blocks() as iface:
192
+ gr.Markdown("# 📜 Text Line Detection with Khmer OCR")
193
+ gr.Markdown("## 📷 Upload an image to detect text lines and extract Khmer text")
194
+
195
+ with gr.Row():
196
+ with gr.Column(scale=1):
197
+ gr.Markdown("### 📤 Upload Image")
198
+ image_input = gr.Image(type="numpy", label="Upload an image")
199
+
200
+ with gr.Column(scale=1):
201
+ gr.Markdown("### 🖼 Annotated Image with Bounding Boxes")
202
+ output_annotated = gr.Image(type="pil", label="Detected Text Lines")
203
+
204
+ gr.Markdown("### 📝 Extracted Text (OCR Result)")
205
+ output_text = gr.Textbox(label="Extracted Text", lines=10)
206
 
207
  image_input.upload(
208
+ detect_and_ocr,
209
  inputs=image_input,
210
+ outputs=[output_annotated, output_text]
211
  )
212
 
213
+ # 🚀 Ensure the app runs properly in Hugging Face Spaces
 
 
 
 
 
 
 
214
  if __name__ == "__main__":
215
+ iface.launch(server_name="0.0.0.0", server_port=7860)
216
+
217