Sanjayraju30 commited on
Commit
9c1cffc
·
verified ·
1 Parent(s): 38dd73a

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +18 -6
ocr_engine.py CHANGED
@@ -3,47 +3,59 @@ import numpy as np
3
  import cv2
4
  import re
5
 
 
6
  reader = easyocr.Reader(['en'], gpu=False)
7
 
8
  def extract_weight_from_image(pil_img):
9
  try:
10
  img = np.array(pil_img)
11
 
12
- # No enhancement, just resize
13
  max_dim = 1000
14
  height, width = img.shape[:2]
15
  if max(height, width) > max_dim:
16
  scale = max_dim / max(height, width)
17
  img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
18
 
 
19
  results = reader.readtext(img)
20
  print("DEBUG OCR RESULTS:", results)
21
 
22
- if not results:
23
- return "No text detected", 0.0, "OCR returned empty list"
24
-
25
  raw_texts = []
26
  weight_candidates = []
27
 
28
  for _, text, conf in results:
29
  original = text
30
  cleaned = text.lower().strip()
 
 
31
  cleaned = cleaned.replace(",", ".")
32
  cleaned = cleaned.replace("o", "0").replace("O", "0")
33
  cleaned = cleaned.replace("s", "5").replace("S", "5")
34
  cleaned = cleaned.replace("g", "9").replace("G", "6")
35
  cleaned = cleaned.replace("kg", "").replace("kgs", "")
36
- cleaned = re.sub(r"[^\d\.]", "", cleaned)
37
 
38
  raw_texts.append(f"{original} → {cleaned} (conf: {round(conf, 2)})")
39
 
40
- if re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
 
41
  weight_candidates.append((cleaned, conf))
42
 
43
  if not weight_candidates:
44
  return "Not detected", 0.0, "\n".join(raw_texts)
45
 
 
46
  best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
 
 
 
 
 
 
 
 
 
47
  return best_weight, round(best_conf * 100, 2), "\n".join(raw_texts)
48
 
49
  except Exception as e:
 
3
  import cv2
4
  import re
5
 
6
+ # Load OCR engine
7
  reader = easyocr.Reader(['en'], gpu=False)
8
 
9
  def extract_weight_from_image(pil_img):
10
  try:
11
  img = np.array(pil_img)
12
 
13
+ # Resize large image if needed
14
  max_dim = 1000
15
  height, width = img.shape[:2]
16
  if max(height, width) > max_dim:
17
  scale = max_dim / max(height, width)
18
  img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
19
 
20
+ # OCR without heavy preprocessing
21
  results = reader.readtext(img)
22
  print("DEBUG OCR RESULTS:", results)
23
 
 
 
 
24
  raw_texts = []
25
  weight_candidates = []
26
 
27
  for _, text, conf in results:
28
  original = text
29
  cleaned = text.lower().strip()
30
+
31
+ # Fix common OCR mistakes
32
  cleaned = cleaned.replace(",", ".")
33
  cleaned = cleaned.replace("o", "0").replace("O", "0")
34
  cleaned = cleaned.replace("s", "5").replace("S", "5")
35
  cleaned = cleaned.replace("g", "9").replace("G", "6")
36
  cleaned = cleaned.replace("kg", "").replace("kgs", "")
37
+ cleaned = re.sub(r"[^0-9\.]", "", cleaned)
38
 
39
  raw_texts.append(f"{original} → {cleaned} (conf: {round(conf, 2)})")
40
 
41
+ # Match flexible weight formats like 75.02, 97.2, 102.34
42
+ if cleaned.count(".") <= 1 and re.match(r"^\d{2,4}(\.\d{1,3})?$", cleaned):
43
  weight_candidates.append((cleaned, conf))
44
 
45
  if not weight_candidates:
46
  return "Not detected", 0.0, "\n".join(raw_texts)
47
 
48
+ # Get best weight
49
  best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
50
+
51
+ # Strip unnecessary leading zeros
52
+ if "." in best_weight:
53
+ int_part, dec_part = best_weight.split(".")
54
+ int_part = int_part.lstrip("0") or "0"
55
+ best_weight = f"{int_part}.{dec_part}"
56
+ else:
57
+ best_weight = best_weight.lstrip("0") or "0"
58
+
59
  return best_weight, round(best_conf * 100, 2), "\n".join(raw_texts)
60
 
61
  except Exception as e: