Sanjayraju30 commited on
Commit
18f53a5
·
verified ·
1 Parent(s): f901f58

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +15 -19
ocr_engine.py CHANGED
@@ -3,39 +3,35 @@ import numpy as np
3
  import cv2
4
  import re
5
 
 
6
  reader = easyocr.Reader(['en'], gpu=False)
7
 
8
  def extract_weight_from_image(pil_img):
9
  try:
 
10
  img = np.array(pil_img)
11
 
12
- # Resize and convert to grayscale
13
- img = cv2.resize(img, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_LINEAR)
14
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
 
 
 
15
 
16
- # Apply Gaussian blur to remove noise
17
- blurred = cv2.GaussianBlur(gray, (5, 5), 0)
18
-
19
- # Apply adaptive threshold
20
- thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
21
- cv2.THRESH_BINARY_INV, 15, 6)
22
-
23
- # OCR
24
- results = reader.readtext(thresh)
25
-
26
- # Debug: Print all detected text
27
- print("OCR Results:", results)
28
 
 
29
  weight_candidates = []
30
- for _, text, conf in results:
31
- text = text.lower().replace('kg', '').replace('kgs', '').strip()
32
- if re.match(r'^\d{2,4}(\.\d{1,2})?$', text):
33
- weight_candidates.append((text, conf))
34
 
35
  if not weight_candidates:
36
  return "Not detected", 0.0
37
 
38
- # Return the one with highest confidence
39
  weight, confidence = sorted(weight_candidates, key=lambda x: -x[1])[0]
40
  return weight, round(confidence * 100, 2)
41
 
 
3
  import cv2
4
  import re
5
 
6
+ # Load the OCR engine
7
  reader = easyocr.Reader(['en'], gpu=False)
8
 
9
  def extract_weight_from_image(pil_img):
10
  try:
11
+ # Convert PIL to OpenCV image (numpy array)
12
  img = np.array(pil_img)
13
 
14
+ # Step 1: Preprocess image for better OCR
15
+ img = cv2.resize(img, None, fx=3, fy=3, interpolation=cv2.INTER_LINEAR)
16
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
17
+ blur = cv2.GaussianBlur(gray, (3, 3), 0)
18
+ _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
19
+ thresh = cv2.bitwise_not(thresh) # Invert for dark digits
20
 
21
+ # Step 2: Run OCR
22
+ results = reader.readtext(thresh, detail=1)
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Step 3: Extract numbers like 65.20 or 50
25
  weight_candidates = []
26
+ for bbox, text, conf in results:
27
+ clean = text.lower().replace("kg", "").replace("kgs", "").strip()
28
+ if re.fullmatch(r"\d{2,4}(\.\d{1,2})?", clean):
29
+ weight_candidates.append((clean, conf))
30
 
31
  if not weight_candidates:
32
  return "Not detected", 0.0
33
 
34
+ # Step 4: Choose highest confidence number
35
  weight, confidence = sorted(weight_candidates, key=lambda x: -x[1])[0]
36
  return weight, round(confidence * 100, 2)
37