from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image # Load OCR model once processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1") model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1") def extract_weight(image: Image.Image) -> str: # Ensure image is in RGB image = image.convert("RGB") # Process with Hugging Face OCR pixel_values = processor(images=image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) full_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] # Normalize text full_text_cleaned = full_text.lower().replace(" ", "") # Detect unit if "kg" in full_text_cleaned: unit = "kg" elif "g" in full_text_cleaned or "gram" in full_text_cleaned: unit = "grams" else: unit = "grams" # default to grams if not clear # Extract number (includes decimals) import re match = re.search(r"(\d+(\.\d+)?)", full_text_cleaned) if match: weight = match.group(1) return f"{weight} {unit}" else: return "No valid weight detected"