import fitz from PIL import Image import re import io import os import logging import shutil from fastapi import FastAPI, UploadFile, File, HTTPException from google.cloud import vision from pdf2image import convert_from_path class doc_processing: def __init__(self, name, id_type, doc_type, f_path): self.name = name self.id_type = id_type self.doc_type = doc_type self.f_path = f_path # self.o_path = o_path def pdf_to_image_scale(self): pdf_document = fitz.open(self.f_path) if self.id_type == "gst": page_num = 2 else: page_num = 0 page = pdf_document.load_page(page_num) pix = page.get_pixmap() # Render page as a pixmap (image) # Convert pixmap to PIL Image image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) original_width, original_height = image.size print("original_width", original_width) print("original_height", original_height) new_width = (1000 / original_width) * original_width new_height = (1000 / original_height) * original_height print("new_width", new_width) print("new_height", new_height) # new_width = # new_height = image.resize((int(new_width), int(new_height)), Image.Resampling.LANCZOS) output_path = "processed_images/{}/{}.jpeg".format(self.id_type, self.name) image.save(output_path) return {"success": 200, "output_p": output_path} def scale_img(self): print("path of file", self.f_path) image = Image.open(self.f_path).convert("RGB") original_width, original_height = image.size print("original_width", original_width) print("original_height", original_height) new_width = (1000 / original_width) * original_width new_height = (1000 / original_height) * original_height print("new_width", new_width) print("new_height", new_height) # new_width = # new_height = image.resize((int(new_width), int(new_height)), Image.Resampling.LANCZOS) output_path = "processed_images/{}/{}.jpeg".format(self.id_type, self.name) image.save(output_path) return {"success": 200, "output_p": output_path} def process(self): if self.doc_type == "pdf" or self.doc_type == "PDF": response = self.pdf_to_image_scale() else: response = self.scale_img() return response from google.cloud import vision vision_client = vision.ImageAnnotatorClient() def extract_document_number(ocr_text: str, id_type: str) -> str: """ Searches the OCR text for a valid document number based on regex patterns. Checks for CIN, then MSME, and finally LLPIN. """ patterns = { "cin": re.compile(r"([LUu]{1}[0-9]{5}[A-Za-z]{2}[0-9]{4}[A-Za-z]{3}[0-9]{6})"), "msme": re.compile(r"(UDYAM-[A-Z]{2}-\d{2}-\d{7})"), "llpin": re.compile(r"([A-Z]{3}-[0-9]{4})"), "pan": re.compile(r"^[A-Z]{3}[PCHFTBALJGT][A-Z][\d]{4}[A-Z]$"), "aadhaar": re.compile(r"^\d{12}$"), } if id_type == "cin_llpin": # Try CIN first match = patterns["cin"].search(ocr_text) if match: return match.group(0) # If CIN not found, try LLPIN match = patterns["llpin"].search(ocr_text) if match: return match.group(0) elif id_type in patterns: match = patterns[id_type].search(ocr_text) if match: return match.group(0) return None def run_google_vision(file_content: bytes) -> str: """ Uses Google Vision OCR to extract text from binary file content. """ image = vision.Image(content=file_content) response = vision_client.text_detection(image=image) texts = response.text_annotations if texts: # The first annotation contains the complete detected text return texts[0].description return "" def extract_text_from_file(file_path: str) -> str: """ Reads the file from file_path. If it's a PDF, converts only the first page to an image, then runs OCR using Google Vision. """ if file_path.lower().endswith(".pdf"): try: # Open the PDF file using PyMuPDF (fitz) pdf_document = fitz.open(file_path) page = pdf_document.load_page(0) # Load the first page pix = page.get_pixmap() # Render page as an image # Convert pixmap to PIL Image image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) # Convert image to bytes for OCR img_byte_arr = io.BytesIO() image.save(img_byte_arr, format="JPEG") file_content = img_byte_arr.getvalue() except Exception as e: logging.error(f"Error converting PDF to image: {e}") return "" else: with open(file_path, "rb") as f: file_content = f.read() return run_google_vision(file_content) def extract_document_number_from_file(file_path: str, id_type: str) -> str: """ Extracts the document number (CIN, MSME, or LLPIN) from the file at file_path. """ ocr_text = extract_text_from_file(file_path) return extract_document_number(ocr_text, id_type) # files = { # "aadhar_file": "/home/javmulla/model_one/test_images_aadhar/test_two.jpg", # "pan_file": "/home/javmulla/model_one/test_images_pan/6ea33087.jpeg", # "cheque_file": "/home/javmulla/model_one/test_images_cheque/0f81678a.jpeg", # "gst_file": "/home/javmulla/model_one/test_images_gst/0a52fbcb_page3_image_0.jpg" # } # files = { # "aadhar_file": "/home/javmulla/model_one/test_images_aadhar/test_two.jpg", # "pan_file": "/home/javmulla/model_one/test_images_pan/6ea33087.jpeg", # "cheque_file": "/home/javmulla/model_one/test_images_cheque/0f81678a.jpeg", # "gst_file": "test_Images_folder/gst/e.pdf" # } # for key, value in files.items(): # name = value.split("/")[-1].split(".")[0] # id_type = key.split("_")[0] # doc_type = value.split("/")[-1].split(".")[1] # f_path = value # preprocessing = doc_processing(name,id_type,doc_type,f_path) # response = preprocessing.process() # print("response",response) # id_type, doc_type, f_path