import gradio as gr
import numpy as np
#import imutils
import cv2
import torch
import matplotlib.pyplot as plt
from pdf2image import convert_from_path, convert_from_bytes
from collections import namedtuple
import pytesseract
import argparse

def convert_pdf_to_png(file_name):
    pages = convert_from_path(file_name)
    for i, page in enumerate(pages):
        page.save(f"{file_name[:-4]}_{i+1}.png", "PNG")


def alinearImagen(image, template, maxFeatures = 500, keepPercent = 0.2, debug = False):
  # Convertimos ambas imágenes en escala de grises
  imageGray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  templateGray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
 
 	# Usamos ORB para detectar los keypoints y extraer los descriptores invariantes
  orb = cv2.ORB_create(maxFeatures)
  (kpsA, descsA) = orb.detectAndCompute(imageGray, None)
  (kpsB, descsB) = orb.detectAndCompute(templateGray, None)
  
  # Hacemos que coincidan
  method = cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING
  matcher = cv2.DescriptorMatcher_create(method)
  matches = matcher.match(descsA, descsB, None)

  # Ordenamos los matches por si distancia, cuando más pequeña sea, más similares son las regiones de los keypoints
  matches = sorted(matches, key=lambda x:x.distance)

  # Almacenamos únicamente los mejores matches
  keep = int(len(matches) * keepPercent)
  matches = matches[:keep]

  # Visualizamos la imagen con las líneas de relación correspondientes si estamos en modo debug
  if debug:
    matchedVis = cv2.drawMatches(image, kpsA, template, kpsB, matches, None)
    matchedVis = imutils.resize(matchedVis, width=1000)
    cv2_imshow(matchedVis)
    cv2.waitKey(0)

  # Reservamos memorias para las coordenadas de los keypoints con mejores matches
  ptsA = np.zeros((len(matches), 2), dtype="float")
  ptsB = np.zeros((len(matches), 2), dtype="float")

  # Recorremos los matches
  for (i, m) in enumerate(matches):
		# Indicamos que los keypoints A y B son mapas de uno a otro
    ptsA[i] = kpsA[m.queryIdx].pt
    ptsB[i] = kpsB[m.trainIdx].pt

  # Calcula la matriz homogrófica entre los dos conjuntos de puntos de matches
  (H, mask) = cv2.findHomography(ptsA, ptsB, method=cv2.RANSAC)
  
  # Usamos la matriz homográfica para alinear las imágenes
  (h, w) = template.shape[:2]
  aligned = cv2.warpPerspective(image, H, (w, h))

  # Devolvemos la imagen alineada
  return aligned

def cleanup_text(text):
  return "".join([c if ord(c) < 128 else "" for c in text]).strip()

def aplicaFiltro(imagen):
  r,g,b = cv2.split(imagen)
  th = cv2.adaptiveThreshold(b,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,7)
  resultado = cv2.fastNlMeansDenoising(th, [50])
  return resultado

def recorte(imagen):
    _, thresh1 = cv2.threshold(imagen, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
    cnts = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)

    xs = []
    ys = []
    hs = []
    ws = []
    
    for c in cnts:
        (x, y, w, h) = cv2.boundingRect(c)
        if w >= 5 and h >= 5 and w < 100 and h < 30:
            xs.append(x)
            ys.append(y)
            hs.append(h)
            ws.append(w)

    if len(xs) > 0:
        x1 = min(xs)
        y1 = min(ys)
        h1 = max(ys) + max(hs) - y1
        w1 = max(xs) + max(ws) - x1

        clone = imagen.copy()
        im = imagen[y1:y1 + h1, x1:x1 + w1]
        return (im, True)
    else:
        return (None, False)

from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import requests

processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

def OCR3_F(nombreFichero):
  # Primero convertimos el fichero dado a PNG
  file_name = nombreFichero.name
  convert_pdf_to_png(file_name)

  # Leemos las dos imagenes generadas
  fich1 = cv2.imread(nombreFichero[:-4]+"_1.png")
  fich2 = cv2.imread(nombreFichero[:-4]+"_2.png")

  # Leemos las dos plantillas
  plantilla1 = cv2.imread("plantilla_0.png")
  plantilla2 = cv2.imread("plantilla_1.png")

  # Alineamos las dos imágenes, cada una con su correspondiente plantilla
  fich1_alineado = alinearImagen(fich1,plantilla1)
  fich2_alineado = alinearImagen(fich2,plantilla2)

  # Concatenamos las imágenes
  fichero = cv2.vconcat([fich1_alineado,fich2_alineado])
  plantilla = cv2.vconcat([plantilla1,plantilla2])

  # Generamos las localizaciones en las que aplicaremos OCR
  OCRLocation = namedtuple("OCRLocation", ["id", "bbox", "onlyNumber"])
  OCR_LOCATIONS = [
      OCRLocation("hojaCatastral", (1550, 55, 200, 32), True),
      OCRLocation("numeroParcela", (1550, 93, 200, 36), True),
      OCRLocation("idemPoligono", (1550, 134, 200, 35), True),
      OCRLocation("idemFotografia", (1550, 134, 200, 34), False),

      OCRLocation("terminoMunicipal", (1240, 254, 520, 38), False),
      OCRLocation("pago", (1300, 305, 460, 40), False),
      OCRLocation("partidoJudicial", (500, 312, 442, 38), False),

      OCRLocation("nombre", (386, 634, 604, 43), False),
      OCRLocation("pueblo", (1006, 632, 240, 44), False),

      OCRLocation("cultivos1", (212, 1373, 493, 43), False),
      OCRLocation("cultivos2", (212, 1426, 493, 42), False),
      OCRLocation("cultivos3", (212, 1480, 493, 41), False)
    ]

  fichFiltro = aplicaFiltro(fichero)

  parsingResults = []
  # loop over the locations of the document we are going to OCR
  for loc in OCR_LOCATIONS:
    # extract the OCR ROI from the aligned image
     (x, y, w, h) = loc.bbox
     roi = fichFiltro[y:y + h, x:x + w]

     im,b = recorte(roi)
     if (b == True):
        # OCR the ROI using Tesseract
        rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
        pixel_values = processor(rgb, return_tensors="pt").pixel_values
        generated_ids = model.generate(pixel_values)
        text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

        for line in text.split("\n"):
        # if the line is empty, ignore it
          if len(line) == 0:
            continue
      
        parsingResults.append((loc, line))

  results = {}
  for (loc, line) in parsingResults:
    r = results.get(loc.id, None)
    if r is None:
      results[loc.id] = (line, loc._asdict())
    else:
      (existingText, loc) = r
      text = "{}\n{}".format(existingText, line)
      results[loc["id"]] = (text, loc)


  for (locID, result) in results.items():
    (text, loc) = result
    
    (x, y, w, h) = loc["bbox"]
    clean = cleanup_text(text)
    
    cv2.rectangle(fichero, (x, y), (x + w, y + h), (0, 255, 0), 2)
    
    for (i, line) in enumerate(text.split("\n")):
      startY = y + (i * 70) + 40
      cv2.putText(fichero, line, (x, startY),
			cv2.FONT_HERSHEY_SIMPLEX, 1.8, (0, 0, 255), 5)

  return fichero
    
# Creamos la interfaz y la lanzamos. 
gr.Interface(fn=OCR3_F, inputs="file", outputs="image", examples=["fich3.pdf", "fich4.pdf"]).launch()