Spaces:

BMukhtar
/

BookRecognitionKz

Sleeping

App Files Files Community

BMukhtar commited on Oct 4, 2024

Commit

29ae74e

1 Parent(s): 2d38cb5

try gpu

Browse files

Files changed (1) hide show

app.py +93 -83

app.py CHANGED Viewed

@@ -84,7 +84,7 @@ for d in dirs:
 font_path = models_dir + "/Ubuntu-Regular.ttf"
 reader = easyocr.Reader(
     ['en'],
-    gpu=False,
     recog_network='best_norm_ED',
     detect_network="craft",
     user_network_directory=models_dir,
@@ -106,14 +106,7 @@ uploaded_file = st.file_uploader("Өз файлыңызды осында жүк
 col1, col2 = st.columns(2)
-#def process_page(page):
-#    image_matrix = fitz.Matrix(fitz.Identity)
-#    pixmap = page.get_pixmap(matrix=image_matrix, dpi=300)
-#    image_data = pixmap.samples# This is a bytes object
-#    image = Image.from("RGB",(pixmap.width, pixmap.height),image_data)
-#    image =  Image.from("RGB", (pixmap.width, pixmap.height), image_data)
-#    result = reader.readtext(np.array(image),paragraph=True)
-#    return image, result
 import time
 max_page = 5
@@ -121,7 +114,7 @@ def recognize_page_image(image):
     start = time.time()
     result = [[0,"Sample 1"],[1,"Sample 2"]]
     result = reader.readtext(np.array(image), batch_size=64, paragraph=False, y_ths=0, width_ths = 0)
-    result = get_paragraph(result, y_ths=0, x_ths = 0)
     end = time.time()
     return result,(end-start)
@@ -163,79 +156,96 @@ def process_pdf(uploaded_file):
     #col1.write("</div>",unsafe_allow_html=True)
     progress_bar.progress(0.99,text=f'{min(total_pages,max_page)} бет жүктелді')
-def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'):
-    # create basic attributes
-    box_group = []
-    for box in raw_result:
-        all_x = [int(coord[0]) for coord in box[0]]
-        all_y = [int(coord[1]) for coord in box[0]]
-        min_x = min(all_x)
-        max_x = max(all_x)
-        min_y = min(all_y)
-        max_y = max(all_y)
-        height = max_y - min_y
-        box_group.append([box[1], min_x, max_x, min_y, max_y, height, 0.5*(min_y+max_y), 0]) # last element indicates group
-    # cluster boxes into paragraph
-    current_group = 1
-    while len([box for box in box_group if box[7]==0]) > 0:
-        box_group0 = [box for box in box_group if box[7]==0] # group0 = non-group
-        # new group
-        if len([box for box in box_group if box[7]==current_group]) == 0:
-            box_group0[0][7] = current_group # assign first box to form new group
-        # try to add group
         else:
-            current_box_group = [box for box in box_group if box[7]==current_group]
-            mean_height = np.mean([box[5] for box in current_box_group])
-            # min_gx = min([box[1] for box in current_box_group]) - x_ths*mean_height
-            # max_gx = max([box[2] for box in current_box_group]) + x_ths*mean_height
-            # min_gy = min([box[3] for box in current_box_group]) - y_ths*mean_height
-            # max_gy = max([box[4] for box in current_box_group]) + y_ths*mean_height
-            add_box = False
-            for box in current_box_group:
-                min_gx = box[1] - x_ths*mean_height
-                max_gx = box[2] + x_ths*mean_height
-                min_gy = box[3] - y_ths*mean_height
-                max_gy = box[4] + y_ths*mean_height
-                for box in box_group0:
-                    same_horizontal_level = (min_gx<=box[1]<=max_gx) or (min_gx<=box[2]<=max_gx)
-                    same_vertical_level = (min_gy<=box[6]<=max_gy)
-                    if same_horizontal_level and same_vertical_level:
-                        box[7] = current_group
-                        add_box = True
                         break
-            # cannot add more box, go to next group
-            if add_box==False:
-                current_group += 1
-    # arrage order in paragraph
-    result = []
-    for i in set(box[7] for box in box_group):
-        current_box_group = [box for box in box_group if box[7]==i]
-        mean_height = np.mean([box[5] for box in current_box_group])
-        min_gx = min([box[1] for box in current_box_group])
-        max_gx = max([box[2] for box in current_box_group])
-        min_gy = min([box[3] for box in current_box_group])
-        max_gy = max([box[4] for box in current_box_group])
-        text = ''
-        while len(current_box_group) > 0:
-            highest = min([box[6] for box in current_box_group])
-            candidates = [box for box in current_box_group if box[6]<highest+0.4*mean_height]
-            # get the far left
-            if mode == 'ltr':
-                most_left = min([box[1] for box in candidates])
-                for box in candidates:
-                    if box[1] == most_left: best_box = box
-            elif mode == 'rtl':
-                most_right = max([box[2] for box in candidates])
-                for box in candidates:
-                    if box[2] == most_right: best_box = box
-            text += ' '+best_box[0]
-            current_box_group.remove(best_box)
-        result.append([ [[min_gx,min_gy],[max_gx,min_gy],[max_gx,max_gy],[min_gx,max_gy]], text[1:]])
-    return result
 if uploaded_file is not None:
@@ -252,8 +262,8 @@ if uploaded_file is not None:
             image = Image.open(uploaded_file)
             #with open(os.path.join("tempDir",image_file))
             col1.image(image)
-            result = reader.readtext(np.array(image), batch_size=64, paragraph=False, y_ths=0, width_ths = 0)
-            result = get_paragraph(result, y_ths=0)
             result_text = "\n\n".join([item[1] for item in result])
             button_group_html = generateButtonGroup(result)
             col2.write(button_group_html, unsafe_allow_html=True)

 font_path = models_dir + "/Ubuntu-Regular.ttf"
 reader = easyocr.Reader(
     ['en'],
+    gpu=True,
     recog_network='best_norm_ED',
     detect_network="craft",
     user_network_directory=models_dir,
 col1, col2 = st.columns(2)
 import time
 max_page = 5
     start = time.time()
     result = [[0,"Sample 1"],[1,"Sample 2"]]
     result = reader.readtext(np.array(image), batch_size=64, paragraph=False, y_ths=0, width_ths = 0)
+    result = get_paragraph(result)
     end = time.time()
     return result,(end-start)
     #col1.write("</div>",unsafe_allow_html=True)
     progress_bar.progress(0.99,text=f'{min(total_pages,max_page)} бет жүктелді')
+class TextBox:
+    def __init__(self, text, coordinates):
+        # order: topLeft, bottomLeft, bottomRight, topRight
+        x_coords = [int(coord[0]) for coord in coordinates]
+        y_coords = [int(coord[1]) for coord in coordinates]
+        self.text = text
+        self.min_x = min(x_coords)
+        self.max_x = max(x_coords)
+        self.min_y = min(y_coords)
+        self.max_y = max(y_coords)
+        self.height = self.max_y - self.min_y
+        self.center_y = 0.5 * (self.min_y + self.max_y)
+        self.group_id = 0  # Initially ungrouped
+    def __repr__(self):
+        return f"TextBox(text={self.text}, group_id={self.group_id})"
+def get_paragraph(ocr_results, horizontal_threshold=0.5, vertical_threshold=0.0, reading_mode='ltr'):
+    # Convert raw OCR results into TextBox objects
+    text_boxes = [TextBox(box[1], box[0]) for box in ocr_results]
+    # Group the boxes into paragraphs
+    current_group_id = 1
+    while any(box.group_id == 0 for box in text_boxes):  # While there are ungrouped boxes
+        ungrouped_boxes = [box for box in text_boxes if box.group_id == 0]
+        # Start a new group if none exists for the current group_id
+        if all(box.group_id != current_group_id for box in text_boxes):
+            ungrouped_boxes[0].group_id = current_group_id  # Assign the first ungrouped box to the new group
         else:
+            # Try to add boxes to the current group
+            current_group_boxes = [box for box in text_boxes if box.group_id == current_group_id]
+            average_height = np.mean([box.height for box in current_group_boxes])
+            added_to_group = False
+            for group_box in current_group_boxes:
+                min_group_x = group_box.min_x - horizontal_threshold * average_height
+                max_group_x = group_box.max_x + horizontal_threshold * average_height
+                min_group_y = group_box.min_y - vertical_threshold * average_height
+                max_group_y = group_box.max_y + vertical_threshold * average_height
+                for ungrouped_box in ungrouped_boxes:
+                    horizontally_aligned = (min_group_x <= ungrouped_box.min_x <= max_group_x) or (min_group_x <= ungrouped_box.max_x <= max_group_x)
+                    vertically_aligned = (min_group_y <= ungrouped_box.center_y <= max_group_y)
+                    if horizontally_aligned and vertically_aligned:
+                        ungrouped_box.group_id = current_group_id
+                        added_to_group = True
                         break
+            # If no box was added to the current group, move to the next group
+            if not added_to_group:
+                current_group_id += 1
+    # Arrange the text order within each group to form paragraphs
+    paragraphs = []
+    for group_id in set(box.group_id for box in text_boxes):
+        boxes_in_group = [box for box in text_boxes if box.group_id == group_id]
+        average_height = np.mean([box.height for box in boxes_in_group])
+        min_group_x = min([box.min_x for box in boxes_in_group])
+        max_group_x = max([box.max_x for box in boxes_in_group])
+        min_group_y = min([box.min_y for box in boxes_in_group])
+        max_group_y = max([box.max_y for box in boxes_in_group])
+        paragraph_text = ''
+        while boxes_in_group:
+            highest_y = min([box.center_y for box in boxes_in_group])
+            line_candidates = [box for box in boxes_in_group if box.center_y < highest_y + 0.4 * average_height]
+            # Determine the left-most or right-most box based on reading mode
+            if reading_mode == 'ltr':
+                left_most_x = min([box.min_x for box in line_candidates])
+                for box in line_candidates:
+                    if box.min_x == left_most_x:
+                        selected_box = box
+            elif reading_mode == 'rtl':
+                right_most_x = max([box.max_x for box in line_candidates])
+                for box in line_candidates:
+                    if box.max_x == right_most_x:
+                        selected_box = box
+            paragraph_text += ' ' + selected_box.text
+            boxes_in_group.remove(selected_box)
+        # Append the bounding box and text for the paragraph
+        paragraphs.append([[[min_group_x, min_group_y], [max_group_x, min_group_y], [max_group_x, max_group_y], [min_group_x, max_group_y]], paragraph_text.strip()])
+    return paragraphs
 if uploaded_file is not None:
             image = Image.open(uploaded_file)
             #with open(os.path.join("tempDir",image_file))
             col1.image(image)
+            result = reader.readtext(np.array(image), batch_size=64, paragraph=False, y_ths=0, width_ths = 0, text_threshold=0.3)
+            result = get_paragraph(result)
             result_text = "\n\n".join([item[1] for item in result])
             button_group_html = generateButtonGroup(result)
             col2.write(button_group_html, unsafe_allow_html=True)