import numpy as np

# def detect_para(bbox_dict):
#     alpha1 = 0.2
#     alpha2 = 0.7
#     beta1 = 0.4
#     data = bbox_dict
#     word_crops = list(data.keys())
#     for i in word_crops:
#         data[i]["x1"], data[i]["y1"], data[i]["x2"], data[i]["y2"] = data[i]["bbox"]
#         data[i]["xc"] = (data[i]["x1"] + data[i]["x2"]) / 2
#         data[i]["yc"] = (data[i]["y1"] + data[i]["y2"]) / 2
#         data[i]["w"] = data[i]["x2"] - data[i]["x1"]
#         data[i]["h"] = data[i]["y2"] - data[i]["y1"]

#     patch_info = {}
#     while word_crops:
#         img_name = word_crops[0].split("_")[0]
#         word_crop_collection = [
#             word_crop for word_crop in word_crops if word_crop.startswith(img_name)
#         ]
#         centroids = {}
#         lines = []
#         img_word_crops = word_crop_collection.copy()
#         para = []
#         while img_word_crops:
#             clusters = []
#             para_words_group = [
#                 img_word_crops[0],
#             ]
#             added = [
#                 img_word_crops[0],
#             ]
#             img_word_crops.remove(img_word_crops[0])
#             ## determining the paragraph
#             while added:
#                 word_crop = added.pop()
#                 for i in range(len(img_word_crops)):
#                     word_crop_ = img_word_crops[i]
#                     if (
#                         abs(data[word_crop_]["yc"] - data[word_crop]["yc"])
#                         < data[word_crop]["h"] * alpha1
#                     ):
#                         if data[word_crop]["xc"] > data[word_crop_]["xc"]:
#                             if (data[word_crop]["x1"] - data[word_crop_]["x2"]) < data[
#                                 word_crop
#                             ]["h"] * alpha2:
#                                 para_words_group.append(word_crop_)
#                                 added.append(word_crop_)
#                         else:
#                             if (data[word_crop_]["x1"] - data[word_crop]["x2"]) < data[
#                                 word_crop
#                             ]["h"] * alpha2:
#                                 para_words_group.append(word_crop_)
#                                 added.append(word_crop_)
#                     else:
#                         if data[word_crop]["yc"] > data[word_crop_]["yc"]:
#                             if (data[word_crop]["y1"] - data[word_crop_]["y2"]) < data[
#                                 word_crop
#                             ]["h"] * beta1 and (
#                                 (
#                                     (data[word_crop_]["x1"] < data[word_crop]["x2"])
#                                     and (data[word_crop_]["x1"] > data[word_crop]["x1"])
#                                 )
#                                 or (
#                                     (data[word_crop_]["x2"] < data[word_crop]["x2"])
#                                     and (data[word_crop_]["x2"] > data[word_crop]["x1"])
#                                 )
#                                 or (
#                                     (data[word_crop]["x1"] > data[word_crop_]["x1"])
#                                     and (data[word_crop]["x2"] < data[word_crop_]["x2"])
#                                 )
#                             ):
#                                 para_words_group.append(word_crop_)
#                                 added.append(word_crop_)
#                         else:
#                             if (data[word_crop_]["y1"] - data[word_crop]["y2"]) < data[
#                                 word_crop
#                             ]["h"] * beta1 and (
#                                 (
#                                     (data[word_crop_]["x1"] < data[word_crop]["x2"])
#                                     and (data[word_crop_]["x1"] > data[word_crop]["x1"])
#                                 )
#                                 or (
#                                     (data[word_crop_]["x2"] < data[word_crop]["x2"])
#                                     and (data[word_crop_]["x2"] > data[word_crop]["x1"])
#                                 )
#                                 or (
#                                     (data[word_crop]["x1"] > data[word_crop_]["x1"])
#                                     and (data[word_crop]["x2"] < data[word_crop_]["x2"])
#                                 )
#                             ):
#                                 para_words_group.append(word_crop_)
#                                 added.append(word_crop_)
#                 img_word_crops = [p for p in img_word_crops if p not in para_words_group]
#             ## processing for the line
#             while para_words_group:
#                 line_words_group = [
#                     para_words_group[0],
#                 ]
#                 added = [
#                     para_words_group[0],
#                 ]
#                 para_words_group.remove(para_words_group[0])
#                 ## determining the line
#                 while added:
#                     word_crop = added.pop()
#                     for i in range(len(para_words_group)):
#                         word_crop_ = para_words_group[i]
#                         if (
#                             abs(data[word_crop_]["yc"] - data[word_crop]["yc"])
#                             < data[word_crop]["h"] * alpha1
#                         ):
#                             if data[word_crop]["xc"] > data[word_crop_]["xc"]:
#                                 if (data[word_crop]["x1"] - data[word_crop_]["x2"]) < data[
#                                     word_crop
#                                 ]["h"] * alpha2:
#                                     line_words_group.append(word_crop_)
#                                     added.append(word_crop_)
#                             else:
#                                 if (data[word_crop_]["x1"] - data[word_crop]["x2"]) < data[
#                                     word_crop
#                                 ]["h"] * alpha2:
#                                     line_words_group.append(word_crop_)
#                                     added.append(word_crop_)
#                     para_words_group = [
#                         p for p in para_words_group if p not in line_words_group
#                     ]
#                 xc = [data[word_crop]["xc"] for word_crop in line_words_group]
#                 idxs = np.argsort(xc)
#                 patch_cluster_ = [line_words_group[i] for i in idxs]
#                 line_words_group = patch_cluster_
#                 x1 = [data[word_crop]["x1"] for word_crop in line_words_group]
#                 x2 = [data[word_crop]["x2"] for word_crop in line_words_group]
#                 y1 = [data[word_crop]["y1"] for word_crop in line_words_group]
#                 y2 = [data[word_crop]["y2"] for word_crop in line_words_group]
#                 txt_line = [data[word_crop]["txt"] for word_crop in line_words_group]
#                 txt = " ".join(txt_line)
#                 x = [x1[0]]
#                 y1_ = [y1[0]]
#                 y2_ = [y2[0]]
#                 l = [len(txt_l) for txt_l in txt_line]
#                 for i in range(1, len(x1)):
#                     x.append((x1[i] + x2[i - 1]) / 2)
#                     y1_.append((y1[i] + y1[i - 1]) / 2)
#                     y2_.append((y2[i] + y2[i - 1]) / 2)
#                 x.append(x2[-1])
#                 y1_.append(y1[-1])
#                 y2_.append(y2[-1])
#                 line_info = {
#                     "x": x,
#                     "y1": y1_,
#                     "y2": y2_,
#                     "l": l,
#                     "txt": txt,
#                     "word_crops": line_words_group,
#                 }
#                 clusters.append(line_info)
#             y_ = [clusters[i]["y1"][0] for i in range(len(clusters))]
#             idxs = np.argsort(y_)
#             clusters_ = [clusters[i] for i in idxs]
#             txt = [clusters[i]["txt"] for i in idxs]
#             l = [len(t) for t in txt]
#             txt = " ".join(txt)
#             para_info = {"lines": clusters_, "l": l, "txt": txt}
#             para.append(para_info)

#         for word_crop in word_crop_collection:
#             word_crops.remove(word_crop)
#         return "\n".join([para[i]["txt"] for i in range(len(para))])


def detect_para(recognized_texts):
    """
    Sort words into lines based on horizontal overlap of bounding boxes.
    
    Args:
        recognized_texts (dict): A dictionary with recognized texts as keys and bounding boxes as values.
                                 Each bounding box is a list of points [x1, y1, x2, y2].
    
    Returns:
        list: A list of lists where each sublist contains words sorted by x-coordinate for a single line.
    """
    def calculate_overlap(bbox1, bbox2):
        """Calculate the vertical overlap between two bounding boxes."""
        # Extract bounding box coordinates
        x1_1, y1_1, x2_1, y2_1 = bbox1
        x1_2, y1_2, x2_2, y2_2 = bbox2

        overlap = max(0, min(y2_1, y2_2) - max(y1_1, y1_2))
        height = min(y2_1 - y1_1, y2_2 - y1_2)
        return overlap / height if height > 0 else 0

    # Convert recognized_texts dictionary to a list of tuples for processing
    items = list(recognized_texts.items())
    lines = []

    while items:
        current_image, current_data = items.pop(0)
        current_text, current_bbox = current_data['txt'], current_data['bbox']
        current_line = [(current_text, current_bbox)]

        remaining_items = []
        for image, data in items:
            text, bbox = data['txt'], data['bbox']
            if calculate_overlap(current_bbox, bbox) > 0.4:
                current_line.append((text, bbox))
            else:
                remaining_items.append((image, data))

        items = remaining_items
        lines.append(current_line)

    # Sort words within each line based on x1 (horizontal position)
    sorted_lines = [
        [text for text, bbox in sorted(line, key=lambda x: x[1][0])] for line in lines
    ]
    return sorted_lines