import fitz from PIL import Image import re import io import os import logging import shutil from fastapi import FastAPI, UploadFile, File, HTTPException from google.cloud import vision # from pdf2image import convert_from_path class doc_processing: def __init__(self, name, id_type, doc_type, f_path): self.name = name self.id_type = id_type self.doc_type = doc_type self.f_path = f_path # self.o_path = o_path def pdf_to_image_scale(self): pdf_document = fitz.open(self.f_path) if self.id_type == "gst": page_num = 2 else: page_num = 0 page = pdf_document.load_page(page_num) pix = page.get_pixmap() # Render page as a pixmap (image) # Convert pixmap to PIL Image image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) original_width, original_height = image.size print("original_width", original_width) print("original_height", original_height) new_width = (1000 / original_width) * original_width new_height = (1000 / original_height) * original_height print("new_width", new_width) print("new_height", new_height) # new_width = # new_height = image.resize((int(new_width), int(new_height)), Image.Resampling.LANCZOS) output_path = "processed_images/{}/{}.jpeg".format(self.id_type, self.name) image.save(output_path) return {"success": 200, "output_p": output_path} def scale_img(self): print("path of file", self.f_path) image = Image.open(self.f_path).convert("RGB") original_width, original_height = image.size print("original_width", original_width) print("original_height", original_height) new_width = (1000 / original_width) * original_width new_height = (1000 / original_height) * original_height print("new_width", new_width) print("new_height", new_height) # new_width = # new_height = image.resize((int(new_width), int(new_height)), Image.Resampling.LANCZOS) output_path = "processed_images/{}/{}.jpeg".format(self.id_type, self.name) image.save(output_path) return {"success": 200, "output_p": output_path} def process(self): if self.doc_type == "pdf" or self.doc_type == "PDF": response = self.pdf_to_image_scale() else: response = self.scale_img() return response from google.cloud import vision vision_client = vision.ImageAnnotatorClient() def extract_document_number(ocr_text: str, id_type: str) -> str: """ Searches the OCR text for a valid document number based on regex patterns. Checks for CIN, then MSME, and finally LLPIN. """ patterns = { "cin": re.compile(r"([LUu]{1}[0-9]{5}[A-Za-z]{2}[0-9]{4}[A-Za-z]{3}[0-9]{6})"), "msme": re.compile(r"(UDYAM-[A-Z]{2}-\d{2}-\d{7})"), "llpin": re.compile(r"([A-Z]{3}-[0-9]{4})"), "pan": re.compile(r"^[A-Z]{3}[PCHFTBALJGT][A-Z][\d]{4}[A-Z]$"), "aadhaar": re.compile(r"^\d{12}$"), } if id_type == "cin_llpin": # Try CIN first match = patterns["cin"].search(ocr_text) if match: return match.group(0) # If CIN not found, try LLPIN match = patterns["llpin"].search(ocr_text) if match: return match.group(0) elif id_type in patterns: match = patterns[id_type].search(ocr_text) if match: return match.group(0) return None def run_google_vision(file_content: bytes) -> str: """ Uses Google Vision OCR to extract text from binary file content. """ image = vision.Image(content=file_content) response = vision_client.text_detection(image=image) texts = response.text_annotations if texts: # The first annotation contains the complete detected text return texts[0].description return "" def extract_text_from_file(file_path: str) -> str: """ Reads the file from file_path. If it's a PDF, converts only the first page to an image, then runs OCR using Google Vision. """ if file_path.lower().endswith(".pdf"): try: # Open the PDF file using PyMuPDF (fitz) pdf_document = fitz.open(file_path) page = pdf_document.load_page(0) # Load the first page pix = page.get_pixmap() # Render page as an image # Convert pixmap to PIL Image image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) # Convert image to bytes for OCR img_byte_arr = io.BytesIO() image.save(img_byte_arr, format="JPEG") file_content = img_byte_arr.getvalue() except Exception as e: logging.error(f"Error converting PDF to image: {e}") return "" else: with open(file_path, "rb") as f: file_content = f.read() return run_google_vision(file_content) def extract_document_number_from_file(file_path: str, id_type: str) -> str: """ Extracts the document number (CIN, MSME, or LLPIN) from the file at file_path. """ ocr_text = extract_text_from_file(file_path) return extract_document_number(ocr_text, id_type) # files = { # "aadhar_file": "/home/javmulla/model_one/test_images_aadhar/test_two.jpg", # "pan_file": "/home/javmulla/model_one/test_images_pan/6ea33087.jpeg", # "cheque_file": "/home/javmulla/model_one/test_images_cheque/0f81678a.jpeg", # "gst_file": "/home/javmulla/model_one/test_images_gst/0a52fbcb_page3_image_0.jpg" # } # files = { # "aadhar_file": "/home/javmulla/model_one/test_images_aadhar/test_two.jpg", # "pan_file": "/home/javmulla/model_one/test_images_pan/6ea33087.jpeg", # "cheque_file": "/home/javmulla/model_one/test_images_cheque/0f81678a.jpeg", # "gst_file": "test_Images_folder/gst/e.pdf" # } # for key, value in files.items(): # name = value.split("/")[-1].split(".")[0] # id_type = key.split("_")[0] # doc_type = value.split("/")[-1].split(".")[1] # f_path = value # preprocessing = doc_processing(name,id_type,doc_type,f_path) # response = preprocessing.process() # print("response",response) # id_type, doc_type, f_path