Sanjayraju30 commited on
Commit
7e1096c
·
verified ·
1 Parent(s): c316ca4

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +36 -16
ocr_engine.py CHANGED
@@ -1,21 +1,41 @@
1
- import easyocr
2
- import re
 
 
3
 
4
- reader = easyocr.Reader(['en']) # Load once
 
 
 
 
 
5
 
6
- def extract_weight_from_image(pil_image):
7
- results = reader.readtext(pil_image)
8
- weight = None
9
- confidence = 0.0
10
 
11
- for (bbox, text, conf) in results:
12
- match = re.search(r'(\d+(\.\d+)?)\s?g', text.lower())
13
- if match:
14
- weight = match.group(1) + " g"
15
- confidence = conf
16
- break
17
 
18
- if weight:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  return weight, confidence
20
- else:
21
- return "No weight detected", 0.0
 
 
 
1
+ import cv2
2
+ import pytesseract
3
+ import numpy as np
4
+ from PIL import Image
5
 
6
+ def extract_weight_from_image(pil_img):
7
+ try:
8
+ # Convert PIL image to OpenCV format
9
+ img = pil_img.convert("RGB")
10
+ img_np = np.array(img)
11
+ img_cv = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
12
 
13
+ # Convert to grayscale
14
+ gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
 
 
15
 
16
+ # Adaptive Thresholding for 7-segment LCD
17
+ processed = cv2.adaptiveThreshold(
18
+ gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 10
19
+ )
 
 
20
 
21
+ # Resize to enhance small text
22
+ resized = cv2.resize(processed, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
23
+
24
+ # OCR config tuned for digit blocks
25
+ config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
26
+
27
+ # Run OCR
28
+ text = pytesseract.image_to_string(resized, config=config)
29
+
30
+ print("🔍 RAW OCR OUTPUT:", repr(text))
31
+
32
+ # Clean the text
33
+ weight = ''.join(c for c in text if c in '0123456789.')
34
+ weight = weight.strip()
35
+
36
+ confidence = 95 if weight else 0
37
  return weight, confidence
38
+
39
+ except Exception as e:
40
+ print("❌ OCR Error:", str(e))
41
+ return "", 0