Sanjayraju30 commited on
Commit
477d4fe
·
verified ·
1 Parent(s): 2d8883b

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +16 -26
ocr_engine.py CHANGED
@@ -1,43 +1,33 @@
1
- import easyocr
2
  import numpy as np
3
- import cv2
4
  import re
5
 
6
- reader = easyocr.Reader(['en'], gpu=False)
7
 
8
  def extract_weight_from_image(pil_img):
9
  try:
10
  img = np.array(pil_img)
 
11
 
12
- # Resize and grayscale
13
- img = cv2.resize(img, None, fx=3.5, fy=3.5, interpolation=cv2.INTER_LINEAR)
14
- gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
15
-
16
- # Denoise and threshold
17
- gray = cv2.bilateralFilter(gray, 11, 17, 17)
18
- thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
19
- cv2.THRESH_BINARY_INV, 11, 2)
20
-
21
- results = reader.readtext(thresh)
22
- all_texts = [text for _, text, _ in results]
23
-
24
  weight_candidates = []
25
- for _, text, conf in results:
26
- cleaned = text.lower()
27
- cleaned = cleaned.replace("kg", "").replace("kgs", "")
28
- cleaned = cleaned.replace("o", "0").replace("O", "0")
29
- cleaned = cleaned.replace("s", "5").replace("S", "5")
30
- cleaned = cleaned.replace("g", "9").replace("G", "6")
31
- cleaned = re.sub(r"[^\d\.]", "", cleaned)
32
 
33
- if re.fullmatch(r"\d{2,4}(\.\d{1,2})?", cleaned):
34
- weight_candidates.append((cleaned, conf))
 
 
 
 
 
 
 
 
35
 
36
  if not weight_candidates:
37
- return "Not detected", 0.0, "\n".join(all_texts)
38
 
39
  best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
40
- return best_weight, round(best_conf * 100, 2), "\n".join(all_texts)
41
 
42
  except Exception as e:
43
  return f"Error: {str(e)}", 0.0, "OCR failed"
 
1
+ from paddleocr import PaddleOCR
2
  import numpy as np
 
3
  import re
4
 
5
+ ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)
6
 
7
  def extract_weight_from_image(pil_img):
8
  try:
9
  img = np.array(pil_img)
10
+ result = ocr.ocr(img, cls=True)
11
 
12
+ all_text = []
 
 
 
 
 
 
 
 
 
 
 
13
  weight_candidates = []
 
 
 
 
 
 
 
14
 
15
+ for line in result:
16
+ for box, (text, confidence) in line:
17
+ all_text.append(text)
18
+ cleaned = text.lower()
19
+ cleaned = cleaned.replace("kg", "").replace("kgs", "")
20
+ cleaned = cleaned.replace("o", "0").replace("s", "5").replace("g", "9")
21
+ cleaned = re.sub(r"[^\d\.]", "", cleaned)
22
+
23
+ if re.fullmatch(r"\d{2,4}(\.\d{1,2})?", cleaned):
24
+ weight_candidates.append((cleaned, confidence))
25
 
26
  if not weight_candidates:
27
+ return "Not detected", 0.0, "\n".join(all_text)
28
 
29
  best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
30
+ return best_weight, round(best_conf * 100, 2), "\n".join(all_text)
31
 
32
  except Exception as e:
33
  return f"Error: {str(e)}", 0.0, "OCR failed"