Norakneath commited on
Commit
78713c7
·
verified ·
1 Parent(s): 4848287

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -57
app.py CHANGED
@@ -1,24 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from ultralytics import YOLO
3
  from PIL import Image, ImageDraw
4
- import pytesseract
5
- import subprocess
6
-
7
- # Ensure Tesseract OCR is installed and detected
8
- TESSERACT_PATH = "/usr/bin/tesseract"
9
- pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
10
-
11
- def check_tesseract():
12
- """Check if Tesseract is installed and print its version."""
13
- try:
14
- tesseract_version = subprocess.check_output([TESSERACT_PATH, "--version"]).decode("utf-8").split("\n")[0]
15
- print(f"Tesseract Version: {tesseract_version}")
16
- return True
17
- except Exception as e:
18
- print(f"Tesseract not found: {e}")
19
- return False
20
-
21
- # Load YOLO model (ensure best.pt exists in the working directory)
22
  YOLO_MODEL_PATH = "best.pt"
23
  model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
24
 
@@ -34,10 +127,10 @@ def merge_boxes_into_lines(boxes, y_threshold=10):
34
  for i in range(1, len(boxes)):
35
  x1, y1, x2, y2 = boxes[i]
36
 
37
- if abs(y1 - current_line[1]) < y_threshold: # Close enough to the previous line
38
- current_line[0] = min(current_line[0], x1) # Extend left boundary
39
- current_line[2] = max(current_line[2], x2) # Extend right boundary
40
- current_line[3] = max(current_line[3], y2) # Extend bottom boundary
41
  else:
42
  merged_lines.append(current_line)
43
  current_line = list(boxes[i])
@@ -45,8 +138,8 @@ def merge_boxes_into_lines(boxes, y_threshold=10):
45
  merged_lines.append(current_line)
46
  return merged_lines
47
 
48
- def detect_and_ocr(image):
49
- """Detects text lines, draws bounding boxes, and runs OCR if available."""
50
  image = Image.fromarray(image)
51
  original_image = image.copy()
52
 
@@ -57,49 +150,34 @@ def detect_and_ocr(image):
57
  merged_boxes = merge_boxes_into_lines(detected_boxes)
58
 
59
  draw = ImageDraw.Draw(original_image)
60
- extracted_text_lines = []
61
 
62
  for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
63
  draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
64
  draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
65
 
66
- cropped_line = image.crop((x1, y1, x2, y2))
67
-
68
- if check_tesseract(): # If Tesseract is installed, run OCR
69
- try:
70
- ocr_text = pytesseract.image_to_string(cropped_line, lang="khm".strip())
71
- if ocr_text:
72
- extracted_text_lines.append(ocr_text)
73
- except Exception as e:
74
- print(f"OCR failed for line {idx}: {e}")
75
-
76
- full_text = "\n".join(extracted_text_lines) if extracted_text_lines else "⚠️ OCR not available. Showing detected lines only."
77
-
78
- return original_image, full_text
79
 
80
- # Gradio UI
81
  with gr.Blocks() as iface:
82
- gr.Markdown("# 📜 Text Line Detection with Khmer OCR")
83
- gr.Markdown("## 📷 Upload an image to detect text lines and extract Khmer text")
84
-
85
- with gr.Row():
86
- with gr.Column(scale=1):
87
- gr.Markdown("### 📤 Upload Image")
88
- image_input = gr.Image(type="numpy", label="Upload an image")
89
-
90
- with gr.Column(scale=1):
91
- gr.Markdown("### 🖼 Annotated Image with Bounding Boxes")
92
- output_annotated = gr.Image(type="pil", label="Detected Text Lines")
93
-
94
- gr.Markdown("### 📝 Extracted Text (OCR Result)")
95
- output_text = gr.Textbox(label="Extracted Text", lines=10)
96
 
97
  image_input.upload(
98
- detect_and_ocr,
99
  inputs=image_input,
100
- outputs=[output_annotated, output_text]
101
  )
102
 
103
- # 🚀 Ensure the app runs properly in Hugging Face Spaces
 
 
 
 
 
 
 
104
  if __name__ == "__main__":
105
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
1
+ # import gradio as gr
2
+ # from ultralytics import YOLO
3
+ # from PIL import Image, ImageDraw
4
+ # import pytesseract
5
+ # import subprocess
6
+
7
+ # # Ensure Tesseract OCR is installed and detected
8
+ # TESSERACT_PATH = "/usr/bin/tesseract"
9
+ # pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
10
+
11
+ # def check_tesseract():
12
+ # """Check if Tesseract is installed and print its version."""
13
+ # try:
14
+ # tesseract_version = subprocess.check_output([TESSERACT_PATH, "--version"]).decode("utf-8").split("\n")[0]
15
+ # print(f"Tesseract Version: {tesseract_version}")
16
+ # return True
17
+ # except Exception as e:
18
+ # print(f"Tesseract not found: {e}")
19
+ # return False
20
+
21
+ # # Load YOLO model (ensure best.pt exists in the working directory)
22
+ # YOLO_MODEL_PATH = "best.pt"
23
+ # model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
24
+
25
+ # def merge_boxes_into_lines(boxes, y_threshold=10):
26
+ # """Merge bounding boxes if they belong to the same text row."""
27
+ # if len(boxes) == 0:
28
+ # return []
29
+
30
+ # boxes = sorted(boxes, key=lambda b: b[1]) # Sort by y-axis (top position)
31
+ # merged_lines = []
32
+ # current_line = list(boxes[0])
33
+
34
+ # for i in range(1, len(boxes)):
35
+ # x1, y1, x2, y2 = boxes[i]
36
+
37
+ # if abs(y1 - current_line[1]) < y_threshold: # Close enough to the previous line
38
+ # current_line[0] = min(current_line[0], x1) # Extend left boundary
39
+ # current_line[2] = max(current_line[2], x2) # Extend right boundary
40
+ # current_line[3] = max(current_line[3], y2) # Extend bottom boundary
41
+ # else:
42
+ # merged_lines.append(current_line)
43
+ # current_line = list(boxes[i])
44
+
45
+ # merged_lines.append(current_line)
46
+ # return merged_lines
47
+
48
+ # def detect_and_ocr(image):
49
+ # """Detects text lines, draws bounding boxes, and runs OCR if available."""
50
+ # image = Image.fromarray(image)
51
+ # original_image = image.copy()
52
+
53
+ # results = model.predict(image, conf=0.1, iou=0.2, device="cpu")
54
+ # detected_boxes = results[0].boxes.xyxy.tolist()
55
+ # detected_boxes = [list(map(int, box)) for box in detected_boxes]
56
+
57
+ # merged_boxes = merge_boxes_into_lines(detected_boxes)
58
+
59
+ # draw = ImageDraw.Draw(original_image)
60
+ # extracted_text_lines = []
61
+
62
+ # for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
63
+ # draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
64
+ # draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
65
+
66
+ # cropped_line = image.crop((x1, y1, x2, y2))
67
+
68
+ # if check_tesseract(): # If Tesseract is installed, run OCR
69
+ # try:
70
+ # ocr_text = pytesseract.image_to_string(cropped_line, lang="khm".strip())
71
+ # if ocr_text:
72
+ # extracted_text_lines.append(ocr_text)
73
+ # except Exception as e:
74
+ # print(f"OCR failed for line {idx}: {e}")
75
+
76
+ # full_text = "\n".join(extracted_text_lines) if extracted_text_lines else "⚠️ OCR not available. Showing detected lines only."
77
+
78
+ # return original_image, full_text
79
+
80
+ # # Gradio UI
81
+ # with gr.Blocks() as iface:
82
+ # gr.Markdown("# 📜 Text Line Detection with Khmer OCR")
83
+ # gr.Markdown("## 📷 Upload an image to detect text lines and extract Khmer text")
84
+
85
+ # with gr.Row():
86
+ # with gr.Column(scale=1):
87
+ # gr.Markdown("### 📤 Upload Image")
88
+ # image_input = gr.Image(type="numpy", label="Upload an image")
89
+
90
+ # with gr.Column(scale=1):
91
+ # gr.Markdown("### 🖼 Annotated Image with Bounding Boxes")
92
+ # output_annotated = gr.Image(type="pil", label="Detected Text Lines")
93
+
94
+ # gr.Markdown("### 📝 Extracted Text (OCR Result)")
95
+ # output_text = gr.Textbox(label="Extracted Text", lines=10)
96
+
97
+ # image_input.upload(
98
+ # detect_and_ocr,
99
+ # inputs=image_input,
100
+ # outputs=[output_annotated, output_text]
101
+ # )
102
+
103
+ # # 🚀 Ensure the app runs properly in Hugging Face Spaces
104
+ # if __name__ == "__main__":
105
+ # iface.launch(server_name="0.0.0.0", server_port=7860)
106
+
107
+
108
  import gradio as gr
109
  from ultralytics import YOLO
110
  from PIL import Image, ImageDraw
111
+ import numpy as np
112
+ import io
113
+
114
+ # Load YOLO model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  YOLO_MODEL_PATH = "best.pt"
116
  model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
117
 
 
127
  for i in range(1, len(boxes)):
128
  x1, y1, x2, y2 = boxes[i]
129
 
130
+ if abs(y1 - current_line[1]) < y_threshold:
131
+ current_line[0] = min(current_line[0], x1)
132
+ current_line[2] = max(current_line[2], x2)
133
+ current_line[3] = max(current_line[3], y2)
134
  else:
135
  merged_lines.append(current_line)
136
  current_line = list(boxes[i])
 
138
  merged_lines.append(current_line)
139
  return merged_lines
140
 
141
+ def detect_text_lines(image):
142
+ """Detects text lines and returns the image with bounding boxes."""
143
  image = Image.fromarray(image)
144
  original_image = image.copy()
145
 
 
150
  merged_boxes = merge_boxes_into_lines(detected_boxes)
151
 
152
  draw = ImageDraw.Draw(original_image)
 
153
 
154
  for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
155
  draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
156
  draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
157
 
158
+ return original_image
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ # Gradio UI for testing
161
  with gr.Blocks() as iface:
162
+ gr.Markdown("# 📜 Text Line Detection with Bounding Boxes")
163
+ image_input = gr.Image(type="numpy", label="Upload an image")
164
+ output_image = gr.Image(type="pil", label="Detected Text Lines")
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  image_input.upload(
167
+ detect_text_lines,
168
  inputs=image_input,
169
+ outputs=output_image
170
  )
171
 
172
+ ### **Expose API for Telegram Bot**
173
+ api_interface = gr.Interface(
174
+ fn=detect_text_lines, # API function that returns the processed image
175
+ inputs=gr.Image(type="numpy"),
176
+ outputs="image"
177
+ )
178
+
179
+ # 🚀 Launch UI and API
180
  if __name__ == "__main__":
181
+ iface.launch(server_name="0.0.0.0", server_port=7860, share=True)
182
+ api_interface.launch(server_name="0.0.0.0", server_port=7861, share=True)
183
+