Sanjayraju30 commited on
Commit
5d670ae
·
verified ·
1 Parent(s): 9073b0e

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +24 -15
ocr_engine.py CHANGED
@@ -1,33 +1,42 @@
1
- from paddleocr import PaddleOCR
2
  import numpy as np
 
3
  import re
4
 
5
- ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)
6
 
7
  def extract_weight_from_image(pil_img):
8
  try:
9
  img = np.array(pil_img)
10
- result = ocr.ocr(img, cls=True)
11
 
12
- all_text = []
 
 
 
 
 
 
 
 
13
  weight_candidates = []
14
 
15
- for line in result:
16
- for box, (text, confidence) in line:
17
- all_text.append(text)
18
- cleaned = text.lower()
19
- cleaned = cleaned.replace("kg", "").replace("kgs", "")
20
- cleaned = cleaned.replace("o", "0").replace("s", "5").replace("g", "9")
21
- cleaned = re.sub(r"[^\d\.]", "", cleaned)
 
22
 
23
- if re.fullmatch(r"\d{2,4}(\.\d{1,2})?", cleaned):
24
- weight_candidates.append((cleaned, confidence))
25
 
26
  if not weight_candidates:
27
- return "Not detected", 0.0, "\n".join(all_text)
28
 
29
  best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
30
- return best_weight, round(best_conf * 100, 2), "\n".join(all_text)
31
 
32
  except Exception as e:
33
  return f"Error: {str(e)}", 0.0, "OCR failed"
 
1
+ import easyocr
2
  import numpy as np
3
+ import cv2
4
  import re
5
 
6
+ reader = easyocr.Reader(['en'], gpu=False)
7
 
8
  def extract_weight_from_image(pil_img):
9
  try:
10
  img = np.array(pil_img)
 
11
 
12
+ # Resize and grayscale
13
+ img = cv2.resize(img, None, fx=4, fy=4, interpolation=cv2.INTER_LINEAR)
14
+ gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
15
+ gray = cv2.bilateralFilter(gray, 11, 17, 17)
16
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
17
+
18
+ results = reader.readtext(thresh)
19
+
20
+ ocr_raw_texts = []
21
  weight_candidates = []
22
 
23
+ for _, text, conf in results:
24
+ ocr_raw_texts.append(text)
25
+ t = text.lower()
26
+ t = t.replace("kg", "").replace("kgs", "")
27
+ t = t.replace("o", "0").replace("O", "0")
28
+ t = t.replace("s", "5").replace("S", "5")
29
+ t = t.replace("g", "9").replace("G", "6")
30
+ t = re.sub(r"[^\d\.]", "", t)
31
 
32
+ if re.fullmatch(r"\d{2,4}(\.\d{1,2})?", t):
33
+ weight_candidates.append((t, conf))
34
 
35
  if not weight_candidates:
36
+ return "Not detected", 0.0, "\n".join(ocr_raw_texts)
37
 
38
  best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
39
+ return best_weight, round(best_conf * 100, 2), "\n".join(ocr_raw_texts)
40
 
41
  except Exception as e:
42
  return f"Error: {str(e)}", 0.0, "OCR failed"