Sanjayraju30 commited on
Commit
ddf8948
·
verified ·
1 Parent(s): 89fe87f

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +45 -35
ocr_engine.py CHANGED
@@ -5,54 +5,64 @@ import re
5
 
6
  reader = easyocr.Reader(['en'], gpu=False)
7
 
8
- def enhance_image(img):
9
- # Resize large image down to avoid OCR failure
10
- max_dim = 1000
11
- height, width = img.shape[:2]
12
- if max(height, width) > max_dim:
13
- scale = max_dim / max(height, width)
14
- img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
15
-
16
- # Convert to gray
17
- gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
18
-
19
- # Denoise
20
- gray = cv2.fastNlMeansDenoising(gray, h=15)
21
-
22
- # Sharpen
23
- kernel = np.array([[0, -1, 0], [-1, 5,-1], [0, -1, 0]])
24
- sharp = cv2.filter2D(gray, -1, kernel)
25
-
26
- # Contrast
27
- clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
28
- contrast = clahe.apply(sharp)
29
-
30
- return contrast
31
-
32
  def extract_weight_from_image(pil_img):
33
  try:
34
  img = np.array(pil_img)
35
- enhanced = enhance_image(img)
36
 
37
- results = reader.readtext(enhanced)
 
 
 
 
 
 
 
 
38
  print("DEBUG OCR RESULTS:", results)
39
 
40
- ocr_texts = [text for _, text, _ in results]
41
  weight_candidates = []
42
 
 
 
 
43
  for _, text, conf in results:
44
- cleaned = text.lower().replace("kg", "").replace("kgs", "")
45
- cleaned = cleaned.replace("o", "0").replace("s", "5").replace("g", "9")
46
- cleaned = re.sub(r"[^\d\.]", "", cleaned)
 
 
 
 
 
 
47
 
48
- if re.fullmatch(r"\d{2,4}(\.\d{1,2})?", cleaned):
 
 
 
 
 
 
 
49
  weight_candidates.append((cleaned, conf))
50
 
51
- if not weight_candidates:
52
- return "Not detected", 0.0, "\n".join(ocr_texts)
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
55
- return best_weight, round(best_conf * 100, 2), "\n".join(ocr_texts)
56
 
57
  except Exception as e:
58
  return f"Error: {str(e)}", 0.0, "OCR failed"
 
5
 
6
  reader = easyocr.Reader(['en'], gpu=False)
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def extract_weight_from_image(pil_img):
9
  try:
10
  img = np.array(pil_img)
 
11
 
12
+ # Resize if image is too big
13
+ max_dim = 1000
14
+ height, width = img.shape[:2]
15
+ if max(height, width) > max_dim:
16
+ scale = max_dim / max(height, width)
17
+ img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
18
+
19
+ # OCR
20
+ results = reader.readtext(img)
21
  print("DEBUG OCR RESULTS:", results)
22
 
23
+ raw_texts = []
24
  weight_candidates = []
25
 
26
+ fallback_weight = None
27
+ fallback_conf = 0.0
28
+
29
  for _, text, conf in results:
30
+ original = text
31
+ cleaned = text.lower().strip()
32
+
33
+ cleaned = cleaned.replace(",", ".")
34
+ cleaned = cleaned.replace("o", "0").replace("O", "0")
35
+ cleaned = cleaned.replace("s", "5").replace("S", "5")
36
+ cleaned = cleaned.replace("g", "9").replace("G", "6")
37
+ cleaned = cleaned.replace("kg", "").replace("kgs", "")
38
+ cleaned = re.sub(r"[^0-9\.]", "", cleaned)
39
 
40
+ raw_texts.append(f"{original} → {cleaned} (conf: {round(conf, 2)})")
41
+
42
+ # Save fallback if any number
43
+ if cleaned and cleaned.replace(".", "").isdigit() and not fallback_weight:
44
+ fallback_weight = cleaned
45
+ fallback_conf = conf
46
+
47
+ if cleaned.count(".") <= 1 and re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
48
  weight_candidates.append((cleaned, conf))
49
 
50
+ if weight_candidates:
51
+ best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
52
+ elif fallback_weight:
53
+ best_weight, best_conf = fallback_weight, fallback_conf
54
+ else:
55
+ return "Not detected", 0.0, "\n".join(raw_texts)
56
+
57
+ # Clean up leading zeros
58
+ if "." in best_weight:
59
+ int_part, dec_part = best_weight.split(".")
60
+ int_part = int_part.lstrip("0") or "0"
61
+ best_weight = f"{int_part}.{dec_part}"
62
+ else:
63
+ best_weight = best_weight.lstrip("0") or "0"
64
 
65
+ return best_weight, round(best_conf * 100, 2), "\n".join(raw_texts)
 
66
 
67
  except Exception as e:
68
  return f"Error: {str(e)}", 0.0, "OCR failed"