Spaces:

MnLgt
/

Signature-Detection

Running

App Files Files Community

MnLgt commited on Sep 18, 2024

Commit

f725299

1 Parent(s): a34e3aa

"hi"

Browse files

Files changed (12) hide show

.gitignore +1 -1
app.py +175 -0
assets/signed_agreement_1.jpg +0 -0
assets/signed_agreement_2.png +0 -0
assets/signed_agreement_3.jpg +0 -0
assets/unsigned_agreement_1.jpg +0 -0
assets/unsigned_agreement_2.jpg +0 -0
assets/unsigned_agreement_3.png +0 -0
requirements.txt +8 -0
scripts/execution_status.py +56 -0
scripts/signature_blocks.py +214 -0
state_dicts/signature_blocks_v14.pth +3 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,3 @@
 */partially_signed_agreement_1.png
-*/**.pyc


1	*/partially_signed_agreement_1.png
2
3	+ *.pyc

app.py ADDED Viewed

	@@ -0,0 +1,175 @@

+"""
+This script creates a Gradio GUI for detecting and classifying signature blocks in document images
+using the SignatureBlockModel. It loads example images from the /assets directory, displays
+bounding boxes in the result image, and shows cropped signature blocks with labels in a separate view.
+"""
+import gradio as gr
+import numpy as np
+from PIL import Image
+import matplotlib.pyplot as plt
+import io
+from typing import Tuple
+import os
+from scripts.signature_blocks import SignatureBlockModel
+ASSETS_DIR = os.path.join(os.path.dirname(__file__), "assets")
+def process_image(image: np.ndarray) -> Tuple[np.ndarray, str, np.ndarray]:
+    """
+    Process an input image using the SignatureBlockModel.
+    Args:
+        image (np.ndarray): Input image as a numpy array.
+    Returns:
+        Tuple[np.ndarray, str, np.ndarray]: Processed image, status, and signature crops image.
+    """
+    # Convert numpy array to PIL Image
+    pil_image = Image.fromarray(image)
+    # Initialize the model
+    model = SignatureBlockModel(pil_image)
+    # Get processed image with boxes
+    image_with_boxes = model.draw_boxes()
+    # Get signature crops
+    signature_crops = create_signature_crops(model)
+    # Determine status
+    labels = model.get_labels()
+    if not labels.any():
+        status = "Unsigned"
+    elif all(label == 1 for label in labels):
+        status = "Fully Executed"
+    elif all(label == 2 for label in labels):
+        status = "Unsigned"
+    else:
+        status = "Partially Executed"
+    return np.array(image_with_boxes), status, signature_crops
+def resize_crop(crop: np.ndarray, factor=0.5) -> np.ndarray:
+    """
+    Resize a crop to a target size.
+    Args:
+        crop (np.ndarray): Input crop as a numpy array.
+        target_size (Tuple[int, int]): Target size for the crop.
+    Returns:
+        np.ndarray: Resized crop.
+    """
+    crop_image = Image.fromarray(crop).convert("RGB")
+    crop_size = crop_image.size
+    target_size = tuple(int(dim * factor) for dim in crop_size)
+    print(f"Target Size: {target_size}")
+    crop_image = crop_image.resize(target_size)
+    return np.array(crop_image)
+def create_signature_crops(model: SignatureBlockModel) -> np.ndarray:
+    """
+    Create an image with stacked signature crops and labels.
+    Args:
+        model (SignatureBlockModel): The initialized SignatureBlockModel.
+    Returns:
+        np.ndarray: Image with stacked signature crops and labels.
+    """
+    boxes = model.get_boxes()
+    scores = model.get_scores()
+    labels = model.get_labels()
+    classes = model.classes
+    # Create a figure with the correct number of subplots
+    fig, axes = plt.subplots(len(boxes), 2, figsize=(10, 3 * len(boxes)))
+    # plt.subplots_adjust(hspace=0.5, wspace=0.1)  # Add space between subplots
+    # Ensure axes is always a 2D array, even with only one box
+    if len(boxes) == 1:
+        axes = axes.reshape(1, -1)
+    for (ax_label, ax_image), box, label, score in zip(axes, boxes, labels, scores):
+        crop = model.extract_box(box)
+        crop = resize_crop(crop, 0.7)
+        # Set background color to black for both subplots
+        ax_label.set_facecolor("black")
+        ax_image.set_facecolor("black")
+        # Add label text
+        label_text = f"Label: {classes[label]}\nScore: {score:.2f}"
+        ax_label.text(
+            0.05,
+            0.5,
+            label_text,
+            color="white",
+            fontsize=12,
+            verticalalignment="center",
+            horizontalalignment="left",
+        )
+        ax_label.axis("off")
+        # Display the crop
+        ax_image.imshow(crop)
+        ax_image.axis("off")
+    plt.tight_layout()
+    # Convert the matplotlib figure to a PNG image
+    buf = io.BytesIO()
+    plt.savefig(buf, format="png", facecolor="black", edgecolor="none")
+    buf.seek(0)
+    signature_crops = np.array(Image.open(buf))
+    plt.close(fig)
+    return signature_crops
+def load_examples():
+    """
+    Load example images from the /assets directory.
+    Returns:
+        List[List[str]]: List of example image paths.
+    """
+    examples = []
+    for filename in os.listdir(ASSETS_DIR):
+        if filename.lower().endswith((".png", ".jpg", ".jpeg")):
+            examples.append([os.path.join(ASSETS_DIR, filename)])
+    return examples
+with gr.Blocks() as demo:
+    gr.Markdown("# Signature Block Detection")
+    gr.Markdown("Upload a document image to detect and classify signature blocks.")
+    with gr.Row():
+        input_image = gr.Image(label="Upload Document Image")
+        output_image = gr.Image(label="Processed Image")
+    with gr.Row():
+        status_box = gr.Textbox(label="Document Status")
+        signature_crops = gr.Image(label="Signature Crops")
+    process_btn = gr.Button("Process Image")
+    examples = gr.Examples(
+        examples=load_examples(),
+        inputs=input_image,
+    )
+    process_btn.click(
+        fn=process_image,
+        inputs=input_image,
+        outputs=[output_image, status_box, signature_crops],
+    )
+if __name__ == "__main__":
+    demo.launch()

assets/signed_agreement_1.jpg ADDED Viewed

assets/signed_agreement_2.png ADDED Viewed

assets/signed_agreement_3.jpg ADDED Viewed

assets/unsigned_agreement_1.jpg ADDED Viewed

assets/unsigned_agreement_2.jpg ADDED Viewed

assets/unsigned_agreement_3.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gabriel.egg==info
+gradio==4.44.0
+matplotlib==3.8.4
+numpy==2.1.1
+Pillow==10.4.0
+torch==2.0.1
+torchvision==0.15.2
+opencv-python

scripts/execution_status.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+from scripts.signature_blocks import SignatureBlockModel
+from typing import List, Any, Tuple
+def flatten_list(xss: List[List[Any]]) -> List[Any]:
+    return [x for xs in xss for x in xs]
+def agreement_status(labels: List[str]) -> str:
+    if labels:
+        if len(set(labels)) > 1:
+            return "Partially Executed"
+        elif list(set(labels))[0] == "SIGNED_BLOCK":
+            return "Fully Executed"
+        elif list(set(labels))[0] == "UNSIGNED_BLOCK":
+            return "Unsigned"
+    else:
+        return "Unknown"
+def execution_status(
+    images: List[Any], show: bool = False
+) -> (int, str, List[Any], List[Any]):
+    if isinstance(images, list):
+        labels = []
+        boxes = []
+        crops = []
+        for page in images:
+            model = SignatureBlockModel(page)
+            if model.predictions[0]["boxes"].shape[0] > 0:
+                page_labels = model._get_labels_names()
+                labels.append(page_labels)
+                boxes.extend(model.get_box_crops())
+                crops.extend(model.get_boxes())
+                if show:
+                    boxes = model.show_boxes()
+        # page.close()
+        num_sig_pages = len(labels)
+        execution_status = agreement_status(flatten_list(labels))
+        return num_sig_pages, execution_status, boxes, crops
+    else:
+        return None, None, None, None
+if __name__ == "__main__":
+    from gabriel.parsers.pdf_parser import ParsePDF
+    filepath = "/Users/jordandavis/GitHub/gabriel/gabriel/datasets/MASTER_REVIEWED/SIGNATURE_PAGE/1a90afa457f328fc7f560d9b49af7b8f.pdf"
+    image = list(ParsePDF(filepath).yield_image())[0]
+    num_sig_pages, status, boxes, crops = execution_status(image)
+    print(f"Num Sig Pages: {num_sig_pages}")
+    print(f"Status: {status}")
+    print(f"Boxes: {boxes}")
+    print(f"Crops: {crops}")

scripts/signature_blocks.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import os
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torchvision.models as models
+from PIL import Image
+from torchvision import models
+from torchvision import transforms as T
+from torchvision.ops import nms
+from typing import List, Any, Tuple
+STATE_DICT = os.path.join(
+    os.path.dirname(__file__), "..", "state_dicts", "signature_blocks_v14.pth"
+)
+def get_device():
+    if torch.cuda.is_available():
+        device = "cuda"
+    # aten::hardsigmoid.out' is not currently implemented for the MPS device
+    # setting fallback does not work either
+    # elif torch.backends.mps.is_built():
+    #     device = "mps"
+    else:
+        device = "cpu"
+    return device
+class ImgFactory:
+    def serialize(self, img: Any) -> Any:
+        serializer = self._get_serializer(img)
+        return serializer(img)
+    def _get_serializer(self, img: Any) -> Any:
+        if isinstance(img, str):
+            return self._serialize_string_to_image
+        else:
+            return self._serialize_image_to_image
+    def _serialize_string_to_image(self, img):
+        return Image.open(img)
+    def _serialize_image_to_image(self, img):
+        return img
+class SignatureBlockModel(ImgFactory):
+    def __init__(self, img, state_dict_path=STATE_DICT):
+        self.state_dict_path = state_dict_path
+        self.classes = {0: "NOTHING", 1: "SIGNED_BLOCK", 2: "UNSIGNED_BLOCK"}
+        self.n_classes = len(self.classes)
+        self.device = get_device()
+        self.model = self._load_model()
+        self.img = self.serialize(img)
+        with torch.no_grad():
+            self.model.eval()
+            self.predictions = self._get_prediction()
+    def _load_model(self):
+        weights = models.detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT
+        model = models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=weights)
+        # change the head
+        in_features = model.roi_heads.box_predictor.cls_score.in_features
+        model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(
+            in_features, self.n_classes
+        )
+        model.load_state_dict(
+            torch.load(self.state_dict_path, map_location=self.device)
+        )
+        return model.to(self.device)
+    def filter_overlap(self, predictions, iou_threshold=0.3):
+        boxes = predictions[0]["boxes"]
+        scores = predictions[0]["scores"]
+        nms_filter = nms(boxes=boxes, scores=scores, iou_threshold=iou_threshold)
+        return nms_filter
+    def filter_scores(self, predictions, score_thrs=0.94):
+        nms_filter = self.filter_overlap(predictions)
+        boxes = predictions[0]["boxes"]
+        scores = predictions[0]["scores"]
+        labels = predictions[0]["labels"]
+        score_filter = scores[nms_filter] > score_thrs
+        boxes = boxes[nms_filter][score_filter]
+        scores = scores[nms_filter][score_filter]
+        labels = labels[nms_filter][score_filter]
+        return boxes, scores, labels
+    def _get_prediction(self):
+        transform = T.Compose([T.ToTensor()])
+        img = transform(self.img)
+        img = img.to(self.device)
+        predictions = self.model([img])
+        boxes, scores, labels = self.filter_scores(predictions)
+        return [{"boxes": boxes, "scores": scores, "labels": labels}]
+    def get_boxes(self):
+        pred = self._get_prediction()
+        boxes = pred[0]["boxes"].cpu().detach().numpy()
+        int_boxes = []
+        for box in boxes:
+            box = [int(x) for x in box]
+            int_boxes.append(box)
+        return int_boxes
+    def get_scores(self):
+        pred = self._get_prediction()
+        scores = pred[0]["scores"].cpu().detach().numpy()
+        return scores
+    def get_labels(self):
+        pred = self._get_prediction()
+        labels = pred[0]["labels"].cpu().detach().numpy()
+        return labels
+    def get_labels_names(self):
+        pred = self._get_prediction()
+        labels = pred[0]["labels"].cpu().detach().numpy()
+        label_names = [self.classes[label] for label in labels]
+        return label_names
+    def _get_prediction_dict(self):
+        boxes = self.get_boxes()
+        scores = self.get_scores()
+        labels = self.get_labels()
+        return {"boxes": boxes, "scores": scores, "labels": labels}
+    def _signature_crops(self, show=True):
+        boxes = self.get_boxes()
+        scores = self.get_scores()
+        labels = self.get_labels()
+        signature_crops = []
+        for box, label, score in tuple(zip(boxes, labels, scores)):
+            crop = self.extract_box(box)
+            if show:
+                crop = plt.imshow(crop)
+            signature_crops.append(crop)
+        return signature_crops
+    def get_prediction(self):
+        return self._get_prediction_dict()
+    def get_image(self):
+        return self.img
+    def get_image_array(self):
+        return np.array(self.img)
+    def get_box_crops(self):
+        boxes = self.get_boxes()
+        box_crops = []
+        for box in boxes:
+            crop = self.img.crop(box)
+            box_crops.append(crop)
+        return box_crops
+    def extract_box(self, box):
+        xmin, ymin, xmax, ymax = box
+        image = np.array(self.img)
+        return image[ymin:ymax, xmin:xmax]
+    def show_boxes(self):
+        boxes = self.get_boxes()
+        scores = self.get_scores()
+        labels = self.get_labels()
+        box_crops = []
+        for box, label, score in tuple(zip(boxes, labels, scores)):
+            print(f"Status: {self.classes[label]}")
+            print(f"Score: {score}")
+            crop = self.extract_box(box)
+            plt.imshow(crop)
+            plt.show()
+            plt.close()
+            box_crops.append(crop)
+        return box_crops
+    def draw_boxes(self):
+        img = np.array(self.img)
+        boxes = self.get_boxes()
+        labels = self.get_labels()
+        thickness = 2
+        overlay = img.copy()
+        for box, label in zip(boxes, labels):
+            box = [int(x) for x in box]
+            if label == 2:
+                color = (0, 0, 255)  # red
+            elif label == 1:
+                color = (0, 255, 0)  # green
+            cv2.rectangle(
+                overlay, (box[0], box[1]), (box[2], box[3]), color, -1
+            )  # Filled rectangle
+        alpha = 0.4  # Transparency factor
+        image_boxes = cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0)
+        # Draw box outlines
+        for box, label in zip(boxes, labels):
+            box = [int(x) for x in box]
+            if label == 2:
+                color = (0, 0, 255)  # red
+            elif label == 1:
+                color = (0, 255, 0)  # green
+            cv2.rectangle(
+                image_boxes, (box[0], box[1]), (box[2], box[3]), color, thickness
+            )
+        return Image.fromarray(cv2.cvtColor(image_boxes, cv2.COLOR_BGR2RGB))

state_dicts/signature_blocks_v14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8de7450d624842a805cb011db1a8bdd3359a817a4f7e5b4c8bcdaf9e340423b0
+size 76042575