Spaces:
Sleeping
Sleeping
import fitz # PyMuPDF | |
from PIL import Image | |
import os | |
from itertools import islice | |
from collections import namedtuple | |
import pytesseract | |
import argparse | |
import imutils | |
import cv2 | |
import shutil | |
import os | |
import numpy as np | |
import gradio as gr | |
def align_images(image, template, maxFeatures=500, keepPercent=0.2,debug=False): | |
# convert both the input image and template to grayscale | |
imageGray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
templateGray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) | |
orb = cv2.ORB_create(maxFeatures) | |
(kpsA, descsA) = orb.detectAndCompute(imageGray, None) | |
(kpsB, descsB) = orb.detectAndCompute(templateGray, None) | |
# match the features | |
method = cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING | |
matcher = cv2.DescriptorMatcher_create(method) | |
matches = matcher.match(descsA, descsB, None) | |
matches = sorted(matches, key=lambda x:x.distance) | |
# keep only the top matches | |
keep = int(len(matches) * keepPercent) | |
matches = matches[:keep] | |
# check to see if we should visualize the matched keypoints | |
if debug: | |
matchedVis = cv2.drawMatches(image, kpsA, template, kpsB,matches, None) | |
matchedVis = imutils.resize(matchedVis, width=1000) | |
cv2_imshow(matchedVis) | |
cv2.waitKey(0) | |
# allocate memory for the keypoints (x, y)-coordinates from the | |
# top matches -- we'll use these coordinates to compute our | |
# homography matrix | |
ptsA = np.zeros((len(matches), 2), dtype="float") | |
ptsB = np.zeros((len(matches), 2), dtype="float") | |
# loop over the top matches | |
for (i, m) in enumerate(matches): | |
# indicate that the two keypoints in the respective images | |
# map to each other | |
ptsA[i] = kpsA[m.queryIdx].pt | |
ptsB[i] = kpsB[m.trainIdx].pt | |
# compute the homography matrix between the two sets of matched | |
# points | |
(H, mask) = cv2.findHomography(ptsA, ptsB, method=cv2.RANSAC) | |
# use the homography matrix to align the images | |
(h, w) = template.shape[:2] | |
aligned = cv2.warpPerspective(image, H, (w, h)) | |
# return the aligned image | |
return aligned | |
def cleanup_text(text): | |
return "".join([c if ord(c) < 128 else "" for c in text]).strip() | |
def detectarCatastro(pdf): | |
if pdf.endswith(".pdf"): | |
images = [] | |
pdf = fitz.open(pdf) | |
# Iterar sobre cada página del PDF | |
for page_num in range(len(pdf)): | |
page = pdf.load_page(page_num) | |
pix = page.get_pixmap() | |
img_array = np.frombuffer(pix.samples, dtype=np.uint8) | |
if pix.alpha: | |
img_array = img_array.reshape((pix.height, pix.width, 4)) | |
else: | |
img_array = img_array.reshape((pix.height, pix.width, 3)) | |
images.append(img_array) | |
aligned_images = [] | |
template = cv2.imread('alignImage1.png') | |
aligned_image = align_images(images[0], template, debug=True) | |
aligned_images.append(aligned_image) | |
template = cv2.imread('alignImage2.png') | |
aligned_image = align_images(images[1], template, debug=True) | |
aligned_images.append(aligned_image) | |
filtered_image = cv2.bilateralFilter(aligned_images[0], 9, 75, 75) | |
alignedImage = filtered_image | |
alignedImage = cv2.resize(alignedImage, None, fx=1, fy=1, interpolation=cv2.INTER_LINEAR) | |
OCRLocation = namedtuple("OCRLocation", ["id", "bbox", "filter_keywords"]) | |
OCR_LOCATIONS = [ | |
OCRLocation("Numero de la parcela", (385, 33, 225, 20), ["numero", "de", "la", "parcela"]), | |
] | |
mostrar = "Numero de la parcela: " | |
for loc in OCR_LOCATIONS: | |
(x, y, w, h) = loc.bbox | |
roi = alignedImage[y:y + h, x:x + w] | |
rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB) | |
text = pytesseract.image_to_string(rgb) | |
mostrar = mostrar + text + " | Cultivos: " | |
filtered_image = cv2.bilateralFilter(aligned_images[1], 9, 75, 75) | |
alignedImage = filtered_image | |
alignedImage = cv2.resize(alignedImage, None, fx=1, fy=1, interpolation=cv2.INTER_LINEAR) | |
OCR_LOCATIONS = [ | |
OCRLocation("Cultivos", (75, 58, 180, 190), ["cultivos", "y", "aprovechamientos"]), | |
] | |
for loc in OCR_LOCATIONS: | |
(x, y, w, h) = loc.bbox | |
roi = alignedImage[y:y + h, x:x + w] | |
rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB) | |
text = pytesseract.image_to_string(rgb) | |
mostrar = mostrar + text | |
return mostrar | |
pdf = gr.File(label="Input PDF", value="ejemplo.pdf") | |
method = gr.Radio(["PaddleOCR","EasyOCR", "KerasOCR"],value="PaddleOCR") | |
output = gr.Textbox(label="Output") | |
demo = gr.Interface( | |
detectarCatastro, | |
[pdf], | |
output, | |
title="DetectorCatastro", | |
css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}", | |
article = """<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at | |
<a href="mailto:[email protected]" target="_blank">[email protected]</a> | |
<p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com/" target="_blank">Pragnakalp Techlabs</a></p>""" | |
) | |
demo.launch() |