import fitz
from PIL import Image
import re
import io
import os
import logging
import shutil
from fastapi import FastAPI, UploadFile, File, HTTPException
from google.cloud import vision

# from pdf2image import convert_from_path


class doc_processing:

    def __init__(self, name, id_type, doc_type, f_path):

        self.name = name
        self.id_type = id_type
        self.doc_type = doc_type
        self.f_path = f_path
        # self.o_path = o_path

    def pdf_to_image_scale(self):
        pdf_document = fitz.open(self.f_path)
        if self.id_type == "gst":
            page_num = 2
        else:
            page_num = 0

        page = pdf_document.load_page(page_num)
        pix = page.get_pixmap()  # Render page as a pixmap (image)

        # Convert pixmap to PIL Image
        image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

        original_width, original_height = image.size

        print("original_width", original_width)
        print("original_height", original_height)

        new_width = (1000 / original_width) * original_width
        new_height = (1000 / original_height) * original_height

        print("new_width", new_width)
        print("new_height", new_height)
        # new_width =
        # new_height =
        image.resize((int(new_width), int(new_height)), Image.Resampling.LANCZOS)
        output_path = "processed_images/{}/{}.jpeg".format(self.id_type, self.name)
        image.save(output_path)
        return {"success": 200, "output_p": output_path}

    def scale_img(self):

        print("path of file", self.f_path)
        image = Image.open(self.f_path).convert("RGB")
        original_width, original_height = image.size

        print("original_width", original_width)
        print("original_height", original_height)

        new_width = (1000 / original_width) * original_width
        new_height = (1000 / original_height) * original_height

        print("new_width", new_width)
        print("new_height", new_height)
        # new_width =
        # new_height =
        image.resize((int(new_width), int(new_height)), Image.Resampling.LANCZOS)
        output_path = "processed_images/{}/{}.jpeg".format(self.id_type, self.name)
        image.save(output_path)
        return {"success": 200, "output_p": output_path}

    def process(self):
        if self.doc_type == "pdf" or self.doc_type == "PDF":
            response = self.pdf_to_image_scale()
        else:
            response = self.scale_img()

        return response


from google.cloud import vision

vision_client = vision.ImageAnnotatorClient()


def extract_document_number(ocr_text: str, id_type: str) -> str:
    """
    Searches the OCR text for a valid document number based on regex patterns.
    Checks for CIN, then MSME, and finally LLPIN.
    """
    patterns = {
        "cin": re.compile(r"([LUu]{1}[0-9]{5}[A-Za-z]{2}[0-9]{4}[A-Za-z]{3}[0-9]{6})"),
        "msme": re.compile(r"(UDYAM-[A-Z]{2}-\d{2}-\d{7})"),
        "llpin": re.compile(r"([A-Z]{3}-[0-9]{4})"),
        "pan": re.compile(r"^[A-Z]{3}[PCHFTBALJGT][A-Z][\d]{4}[A-Z]$"),
        "aadhaar": re.compile(r"^\d{12}$"),
    }

    if id_type == "cin_llpin":
        # Try CIN first
        match = patterns["cin"].search(ocr_text)
        if match:
            return match.group(0)
        # If CIN not found, try LLPIN
        match = patterns["llpin"].search(ocr_text)
        if match:
            return match.group(0)
    elif id_type in patterns:
        match = patterns[id_type].search(ocr_text)
        if match:
            return match.group(0)

    return None


def run_google_vision(file_content: bytes) -> str:
    """
    Uses Google Vision OCR to extract text from binary file content.
    """
    image = vision.Image(content=file_content)
    response = vision_client.text_detection(image=image)
    texts = response.text_annotations
    if texts:
        # The first annotation contains the complete detected text
        return texts[0].description
    return ""


def extract_text_from_file(file_path: str) -> str:
    """
    Reads the file from file_path. If it's a PDF, converts only the first page to an image,
    then runs OCR using Google Vision.
    """
    if file_path.lower().endswith(".pdf"):
        try:
            # Open the PDF file using PyMuPDF (fitz)
            pdf_document = fitz.open(file_path)
            page = pdf_document.load_page(0)  # Load the first page
            pix = page.get_pixmap()  # Render page as an image

            # Convert pixmap to PIL Image
            image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

            # Convert image to bytes for OCR
            img_byte_arr = io.BytesIO()
            image.save(img_byte_arr, format="JPEG")
            file_content = img_byte_arr.getvalue()

        except Exception as e:
            logging.error(f"Error converting PDF to image: {e}")
            return ""
    else:
        with open(file_path, "rb") as f:
            file_content = f.read()

    return run_google_vision(file_content)


def extract_document_number_from_file(file_path: str, id_type: str) -> str:
    """
    Extracts the document number (CIN, MSME, or LLPIN) from the file at file_path.
    """
    ocr_text = extract_text_from_file(file_path)
    return extract_document_number(ocr_text, id_type)


# files = {
#     "aadhar_file": "/home/javmulla/model_one/test_images_aadhar/test_two.jpg",
#     "pan_file": "/home/javmulla/model_one/test_images_pan/6ea33087.jpeg",
#     "cheque_file": "/home/javmulla/model_one/test_images_cheque/0f81678a.jpeg",
#     "gst_file": "/home/javmulla/model_one/test_images_gst/0a52fbcb_page3_image_0.jpg"
# }


# files = {
#     "aadhar_file": "/home/javmulla/model_one/test_images_aadhar/test_two.jpg",
#     "pan_file": "/home/javmulla/model_one/test_images_pan/6ea33087.jpeg",
#     "cheque_file": "/home/javmulla/model_one/test_images_cheque/0f81678a.jpeg",
#     "gst_file": "test_Images_folder/gst/e.pdf"
# }

# for key, value in files.items():
#     name = value.split("/")[-1].split(".")[0]
#     id_type = key.split("_")[0]
#     doc_type = value.split("/")[-1].split(".")[1]
#     f_path = value
#     preprocessing = doc_processing(name,id_type,doc_type,f_path)
#     response = preprocessing.process()
#     print("response",response)


# id_type, doc_type, f_path