Sanjayraju30 commited on
Commit
65ef5f8
·
verified ·
1 Parent(s): f32159a

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +15 -4
ocr_engine.py CHANGED
@@ -1,11 +1,22 @@
1
  import cv2
2
  import pytesseract
3
- from PIL import Image
4
  import numpy as np
 
 
 
 
 
5
 
6
- def extract_weight(image: Image.Image) -> str:
7
- img = np.array(image.convert("RGB"))
8
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
9
- text = pytesseract.image_to_string(gray, config="--psm 7 digits")
 
 
 
 
 
 
 
 
10
  weight = ''.join(filter(lambda x: x in '0123456789.', text))
11
  return weight if weight else "No valid weight detected"
 
1
  import cv2
2
  import pytesseract
 
3
  import numpy as np
4
+ from PIL import Image
5
+
6
+ def extract_weight(pil_image: Image.Image) -> str:
7
+ # Convert PIL to OpenCV image
8
+ img = np.array(pil_image.convert("RGB"))
9
 
10
+ # Convert to grayscale
 
11
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
12
+
13
+ # Optional: Resize to improve accuracy
14
+ gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
15
+
16
+ # OCR config for digits only
17
+ config = "--psm 7 digits"
18
+ text = pytesseract.image_to_string(gray, config=config)
19
+
20
+ # Keep digits and decimal
21
  weight = ''.join(filter(lambda x: x in '0123456789.', text))
22
  return weight if weight else "No valid weight detected"