Sanjayraju30 commited on
Commit
546e454
·
verified ·
1 Parent(s): abbbfd5

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +16 -9
ocr_engine.py CHANGED
@@ -10,7 +10,7 @@ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwrit
10
  def clean_ocr_text(text):
11
  # Fix common OCR misreads
12
  text = text.replace(",", ".").replace("s", "5").replace("o", "0").replace("O", "0")
13
- return re.sub(r"[^\d.kg]", "", text.lower()) # keep digits, dot, k, g
14
 
15
  def restore_decimal(text):
16
  if re.fullmatch(r"\d{5}", text):
@@ -25,11 +25,11 @@ def extract_unit_from_text(raw_text):
25
  return "kg"
26
  elif "g" in raw_text:
27
  return "g"
28
- return "g" # fallback if unit not found
29
 
30
  def extract_weight(image):
31
  try:
32
- # Enhance image
33
  image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC)
34
  image = image.filter(ImageFilter.SHARPEN)
35
 
@@ -40,17 +40,24 @@ def extract_weight(image):
40
 
41
  cleaned = clean_ocr_text(raw_text)
42
 
43
- # Try direct match (e.g., 52.25 kg or 250.5g)
44
  match = re.search(r"(\d{1,3}\.\d{1,3})\s*(kg|g)?", cleaned)
45
  if match:
46
  return f"{match.group(1)} {match.group(2) or ''}".strip(), raw_text
47
 
48
- # Fallback if no decimal found: convert big number like 53255 to 52.255
49
- fallback_match = re.search(r"\d{4,5}", cleaned)
50
- if fallback_match:
51
- decimal_fixed = restore_decimal(fallback_match.group())
52
- return decimal_fixed, raw_text
 
 
 
 
 
 
53
 
54
  return "Error: No valid weight found", raw_text
 
55
  except Exception as e:
56
  return f"Error: {str(e)}", ""
 
10
  def clean_ocr_text(text):
11
  # Fix common OCR misreads
12
  text = text.replace(",", ".").replace("s", "5").replace("o", "0").replace("O", "0")
13
+ return re.sub(r"[^\d.kg]", "", text.lower())
14
 
15
  def restore_decimal(text):
16
  if re.fullmatch(r"\d{5}", text):
 
25
  return "kg"
26
  elif "g" in raw_text:
27
  return "g"
28
+ return "g" # fallback if no unit
29
 
30
  def extract_weight(image):
31
  try:
32
+ # Resize & sharpen image
33
  image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC)
34
  image = image.filter(ImageFilter.SHARPEN)
35
 
 
40
 
41
  cleaned = clean_ocr_text(raw_text)
42
 
43
+ # Case 1: Match decimal with unit
44
  match = re.search(r"(\d{1,3}\.\d{1,3})\s*(kg|g)?", cleaned)
45
  if match:
46
  return f"{match.group(1)} {match.group(2) or ''}".strip(), raw_text
47
 
48
+ # Case 2: Large number fallback like 53255 52.255
49
+ match = re.search(r"\d{4,5}", cleaned)
50
+ if match:
51
+ decimal_fixed = restore_decimal(match.group())
52
+ unit = extract_unit_from_text(raw_text)
53
+ return f"{decimal_fixed} {unit}", raw_text
54
+
55
+ # Final fallback: plain number
56
+ match = re.search(r"\d+", cleaned)
57
+ if match:
58
+ return f"{match.group()} g", raw_text
59
 
60
  return "Error: No valid weight found", raw_text
61
+
62
  except Exception as e:
63
  return f"Error: {str(e)}", ""