Spaces:
Running
Running
from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
from PIL import Image | |
# Load model + processor once | |
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1") | |
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1") | |
def extract_weight(image: Image.Image) -> str: | |
# Convert image to RGB just in case | |
image = image.convert("RGB") | |
# Preprocess + generate prediction | |
pixel_values = processor(images=image, return_tensors="pt").pixel_values | |
generated_ids = model.generate(pixel_values) | |
# Decode output | |
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
# Keep only numbers + decimal | |
weight = ''.join(filter(lambda x: x in '0123456789.', text)) | |
return weight.strip() if weight else "No valid weight detected" | |