Spaces:
Running
Running
File size: 1,919 Bytes
1dbcf19 8348064 1dbcf19 17d218a 65ef5f8 d07e9f7 1dbcf19 65ef5f8 25cb585 83f2f91 8348064 d07e9f7 83f2f91 25cb585 d07e9f7 8348064 1dbcf19 d07e9f7 1dbcf19 25cb585 83f2f91 1dbcf19 d07e9f7 1dbcf19 83f2f91 5607fce d07e9f7 5607fce 1dbcf19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image, ImageFilter
import torch
import re
# Load model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
def clean_ocr_text(text):
print("[RAW OCR]", text)
text = text.replace(",", ".").replace("s", "5").replace("o", "0").replace("O", "0")
text = re.sub(r"[^\d.kg]", "", text.lower()) # Keep digits, dots, and kg
print("[CLEANED OCR]", text)
return text
def restore_decimal(text):
if re.fullmatch(r"\d{5}", text):
return f"{text[:2]}.{text[2:]}"
elif re.fullmatch(r"\d{4}", text):
return f"{text[:2]}.{text[2:]}"
return text
def extract_weight(image):
try:
image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC)
image = image.filter(ImageFilter.SHARPEN)
pixel_values = processor(images=image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
raw_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
cleaned = clean_ocr_text(raw_text)
# Try direct match: e.g., 52.25 kg or 75.0 g
match = re.search(r"(\d{1,3}\.\d{1,3})\s*(kg|g)", cleaned)
if match:
return f"{match.group(1)} {match.group(2)}"
# Try fallback: extract digits and manually guess decimal
fallback_match = re.search(r"(\d{4,5})", cleaned)
if fallback_match:
fallback_value = restore_decimal(fallback_match.group(1))
# Check for presence of unit hints in raw_text
unit = "kg" if "kg" in raw_text.lower() else "g"
return f"{fallback_value} {unit}"
return f"No valid weight found | OCR: {cleaned}"
except Exception as e:
return f"Error: {str(e)}"
|