|
|
|
import os |
|
from transformers import AutoModel, AutoTokenizer |
|
from PIL import Image, ImageEnhance, ImageFilter |
|
import torch |
|
import logging |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
class OCRModel: |
|
_instance = None |
|
|
|
def __new__(cls): |
|
if cls._instance is None: |
|
cls._instance = super(OCRModel, cls).__new__(cls) |
|
cls._instance.initialize() |
|
return cls._instance |
|
|
|
def initialize(self): |
|
try: |
|
logger.info("Initializing OCR model...") |
|
|
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True) |
|
self.model = AutoModel.from_pretrained( |
|
'RufusRubin777/GOT-OCR2_0_CPU', |
|
trust_remote_code=True, |
|
low_cpu_mem_usage=True, |
|
device_map='cpu', |
|
use_safetensors=True, |
|
pad_token_id=self.tokenizer.eos_token_id |
|
) |
|
|
|
|
|
self.device = "cpu" |
|
self.model = self.model.eval().cpu() |
|
|
|
logger.info("Model initialization completed successfully") |
|
|
|
except Exception as e: |
|
logger.error(f"Error initializing model: {str(e)}", exc_info=True) |
|
raise |
|
|
|
def preprocess_image(self, image): |
|
"""معالجة مسبقة للصورة لتحسين جودة التعرف على النص""" |
|
try: |
|
|
|
if image.mode != 'RGB': |
|
image = image.convert('RGB') |
|
|
|
|
|
enhancer = ImageEnhance.Contrast(image) |
|
image = enhancer.enhance(1.5) |
|
|
|
|
|
enhancer = ImageEnhance.Sharpness(image) |
|
image = enhancer.enhance(1.5) |
|
|
|
|
|
enhancer = ImageEnhance.Brightness(image) |
|
image = enhancer.enhance(1.2) |
|
|
|
|
|
image = image.filter(ImageFilter.SMOOTH) |
|
|
|
return image |
|
except Exception as e: |
|
logger.error(f"Error in image preprocessing: {str(e)}", exc_info=True) |
|
raise |
|
|
|
def process_image(self, image_stream): |
|
try: |
|
logger.info("Starting image processing") |
|
|
|
|
|
temp_image_path = "temp_image.jpg" |
|
|
|
|
|
image_stream.seek(0) |
|
|
|
|
|
image = Image.open(image_stream).convert('RGB') |
|
processed_image = self.preprocess_image(image) |
|
processed_image.save(temp_image_path) |
|
|
|
|
|
try: |
|
result = self.model.chat(self.tokenizer, temp_image_path, ocr_type='format') |
|
logger.info(f"Successfully extracted text: {result[:100]}...") |
|
|
|
|
|
if os.path.exists(temp_image_path): |
|
os.remove(temp_image_path) |
|
|
|
return result.strip() |
|
|
|
except Exception as e: |
|
logger.error(f"Error in OCR processing: {str(e)}", exc_info=True) |
|
if os.path.exists(temp_image_path): |
|
os.remove(temp_image_path) |
|
raise |
|
|
|
except Exception as e: |
|
logger.error(f"Error in image processing: {str(e)}", exc_info=True) |
|
return f"Error processing image: {str(e)}" |