Sanjayraju30 commited on
Commit
4a07e0e
·
verified ·
1 Parent(s): 220f8e5

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +14 -13
ocr_engine.py CHANGED
@@ -1,27 +1,28 @@
1
- import easyocr
2
  import numpy as np
3
  import re
4
  import cv2
5
-
6
- reader = easyocr.Reader(['en'], gpu=False)
7
 
8
  def extract_weight_from_image(pil_img):
9
  try:
10
  img = np.array(pil_img)
11
 
12
- # Preprocessing
13
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
14
- resized = cv2.resize(gray, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)
15
- blurred = cv2.GaussianBlur(resized, (3, 3), 0)
16
- _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
17
 
18
- # OCR
19
- result = reader.readtext(thresh, detail=0)
20
- combined_text = " ".join(result)
21
- print("OCR Result:", combined_text)
 
 
 
 
 
 
22
 
23
- # Improve regex to only match numbers with optional decimal
24
- match = re.search(r"\b(?:\d{1,3}\.?\d{1,2}|\d{1,4})\b", combined_text)
25
  if match:
26
  return match.group(), 95.0
27
  else:
 
 
1
  import numpy as np
2
  import re
3
  import cv2
4
+ import pytesseract
 
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
8
  img = np.array(pil_img)
9
 
10
+ # Convert to grayscale
11
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
 
 
 
12
 
13
+ # Resize (sharpens small digits)
14
+ gray = cv2.resize(gray, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)
15
+
16
+ # Thresholding to clean up image
17
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
18
+
19
+ # Run Tesseract OCR
20
+ custom_config = r'--oem 3 --psm 6'
21
+ text = pytesseract.image_to_string(thresh, config=custom_config)
22
+ print("OCR Text:", text)
23
 
24
+ # Extract weight pattern like 25.50 or 150
25
+ match = re.search(r"\b\d{1,4}\.?\d{0,2}\b", text)
26
  if match:
27
  return match.group(), 95.0
28
  else: