Sanjayraju30 commited on
Commit
38dd73a
·
verified ·
1 Parent(s): 8211ee7

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +12 -32
ocr_engine.py CHANGED
@@ -5,36 +5,24 @@ import re
5
 
6
  reader = easyocr.Reader(['en'], gpu=False)
7
 
8
- def enhance_image(img):
9
- max_dim = 1000
10
- height, width = img.shape[:2]
11
- if max(height, width) > max_dim:
12
- scale = max_dim / max(height, width)
13
- img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
14
-
15
- gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
16
- gray = cv2.fastNlMeansDenoising(gray, h=15)
17
-
18
- kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
19
- sharp = cv2.filter2D(gray, -1, kernel)
20
-
21
- clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
22
- enhanced = clahe.apply(sharp)
23
-
24
- return enhanced
25
-
26
  def extract_weight_from_image(pil_img):
27
  try:
28
  img = np.array(pil_img)
29
- enhanced = enhance_image(img)
30
 
31
- results = reader.readtext(enhanced)
 
 
 
 
 
 
 
32
  print("DEBUG OCR RESULTS:", results)
33
 
34
  if not results:
35
  return "No text detected", 0.0, "OCR returned empty list"
36
 
37
- all_texts = []
38
  weight_candidates = []
39
 
40
  for _, text, conf in results:
@@ -47,24 +35,16 @@ def extract_weight_from_image(pil_img):
47
  cleaned = cleaned.replace("kg", "").replace("kgs", "")
48
  cleaned = re.sub(r"[^\d\.]", "", cleaned)
49
 
50
- all_texts.append(f"{original} → {cleaned} (conf: {round(conf, 2)})")
51
 
52
  if re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
53
  weight_candidates.append((cleaned, conf))
54
 
55
  if not weight_candidates:
56
- return "Not detected", 0.0, "\n".join(all_texts)
57
 
58
  best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
59
-
60
- if "." in best_weight:
61
- parts = best_weight.split(".")
62
- parts[0] = parts[0].lstrip("0") or "0"
63
- best_weight = ".".join(parts)
64
- else:
65
- best_weight = best_weight.lstrip("0") or "0"
66
-
67
- return best_weight, round(best_conf * 100, 2), "\n".join(all_texts)
68
 
69
  except Exception as e:
70
  return f"Error: {str(e)}", 0.0, "OCR failed"
 
5
 
6
  reader = easyocr.Reader(['en'], gpu=False)
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def extract_weight_from_image(pil_img):
9
  try:
10
  img = np.array(pil_img)
 
11
 
12
+ # No enhancement, just resize
13
+ max_dim = 1000
14
+ height, width = img.shape[:2]
15
+ if max(height, width) > max_dim:
16
+ scale = max_dim / max(height, width)
17
+ img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
18
+
19
+ results = reader.readtext(img)
20
  print("DEBUG OCR RESULTS:", results)
21
 
22
  if not results:
23
  return "No text detected", 0.0, "OCR returned empty list"
24
 
25
+ raw_texts = []
26
  weight_candidates = []
27
 
28
  for _, text, conf in results:
 
35
  cleaned = cleaned.replace("kg", "").replace("kgs", "")
36
  cleaned = re.sub(r"[^\d\.]", "", cleaned)
37
 
38
+ raw_texts.append(f"{original} → {cleaned} (conf: {round(conf, 2)})")
39
 
40
  if re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
41
  weight_candidates.append((cleaned, conf))
42
 
43
  if not weight_candidates:
44
+ return "Not detected", 0.0, "\n".join(raw_texts)
45
 
46
  best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
47
+ return best_weight, round(best_conf * 100, 2), "\n".join(raw_texts)
 
 
 
 
 
 
 
 
48
 
49
  except Exception as e:
50
  return f"Error: {str(e)}", 0.0, "OCR failed"