Sanjayraju30 commited on
Commit
8ccdb60
·
verified ·
1 Parent(s): d736dc4

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +26 -22
ocr_engine.py CHANGED
@@ -9,50 +9,54 @@ def extract_weight_from_image(pil_img):
9
  try:
10
  img = np.array(pil_img)
11
 
12
- # Resize large images
13
  max_dim = 1000
14
  height, width = img.shape[:2]
15
  if max(height, width) > max_dim:
16
  scale = max_dim / max(height, width)
17
  img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
18
 
19
- # Run OCR
20
  results = reader.readtext(img)
21
 
22
- weight_candidates = []
23
- fallback_weight = None
24
- fallback_conf = 0.0
25
 
26
  for item in results:
27
- if len(item) != 2:
28
  continue
29
- text_data = item[1]
30
- if not isinstance(text_data, tuple) or len(text_data) != 2:
31
- continue
32
-
33
- text, conf = text_data
34
  cleaned = text.lower().strip()
 
 
35
  cleaned = cleaned.replace(",", ".")
36
  cleaned = cleaned.replace("o", "0").replace("O", "0")
37
  cleaned = cleaned.replace("s", "5").replace("S", "5")
38
  cleaned = cleaned.replace("g", "9").replace("G", "6")
39
  cleaned = cleaned.replace("kg", "").replace("kgs", "")
40
- cleaned = re.sub(r"[^0-9\.]", "", cleaned)
41
 
42
- if cleaned and cleaned.replace(".", "").isdigit() and not fallback_weight:
43
- fallback_weight = cleaned
44
- fallback_conf = conf
 
 
45
 
46
- if cleaned.count(".") <= 1 and re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
47
- weight_candidates.append((cleaned, conf))
 
 
 
 
 
 
 
 
 
48
 
49
- if weight_candidates:
50
- best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
51
- elif fallback_weight:
52
- best_weight, best_conf = fallback_weight, fallback_conf
53
- else:
54
  return "Not detected", 0.0
55
 
 
56
  if "." in best_weight:
57
  int_part, dec_part = best_weight.split(".")
58
  int_part = int_part.lstrip("0") or "0"
 
9
  try:
10
  img = np.array(pil_img)
11
 
12
+ # Resize if too large
13
  max_dim = 1000
14
  height, width = img.shape[:2]
15
  if max(height, width) > max_dim:
16
  scale = max_dim / max(height, width)
17
  img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
18
 
 
19
  results = reader.readtext(img)
20
 
21
+ best_weight = None
22
+ best_conf = 0.0
 
23
 
24
  for item in results:
25
+ if len(item) != 2 or not isinstance(item[1], tuple):
26
  continue
27
+ text, conf = item[1]
 
 
 
 
28
  cleaned = text.lower().strip()
29
+
30
+ # Fix misread characters
31
  cleaned = cleaned.replace(",", ".")
32
  cleaned = cleaned.replace("o", "0").replace("O", "0")
33
  cleaned = cleaned.replace("s", "5").replace("S", "5")
34
  cleaned = cleaned.replace("g", "9").replace("G", "6")
35
  cleaned = cleaned.replace("kg", "").replace("kgs", "")
36
+ cleaned = re.sub(r"[^\d\.]", "", cleaned)
37
 
38
+ # Check for number format like 75.5, 102.3
39
+ if re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
40
+ if conf > best_conf:
41
+ best_weight = cleaned
42
+ best_conf = conf
43
 
44
+ # If nothing matched, return first numeric string found
45
+ if not best_weight:
46
+ for item in results:
47
+ if len(item) != 2 or not isinstance(item[1], tuple):
48
+ continue
49
+ text, conf = item[1]
50
+ fallback = re.sub(r"[^\d\.]", "", text)
51
+ if fallback and fallback.replace(".", "").isdigit():
52
+ best_weight = fallback
53
+ best_conf = conf
54
+ break
55
 
56
+ if not best_weight:
 
 
 
 
57
  return "Not detected", 0.0
58
 
59
+ # Strip leading zeros
60
  if "." in best_weight:
61
  int_part, dec_part = best_weight.split(".")
62
  int_part = int_part.lstrip("0") or "0"