from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image import torch import re # Load TrOCR processor and model once processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") def clean_ocr_text(text): # Fix common OCR mistakes text = text.replace(",", ".") # comma to dot text = re.sub(r"[^\d\.kg]", "", text.lower()) # keep only digits, dot, k, g return text def extract_weight(image): try: # TrOCR inference pixel_values = processor(images=image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) raw_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() print("OCR Raw Output:", raw_text) # Clean and normalize text cleaned_text = clean_ocr_text(raw_text) print("Cleaned OCR:", cleaned_text) # Flexible regex to catch even minor issues (e.g., 52.2g, 98.7kg) pattern = r'(\d{1,5}(?:\.\d{1,3})?)\s*(kg|g)' match = re.search(pattern, cleaned_text) if match: value = match.group(1) unit = match.group(2) return f"{value} {unit}" else: return "No valid weight found" except Exception as e: return f"Error: {str(e)}"