Sanjayraju30 commited on
Commit
1cb8b90
·
verified ·
1 Parent(s): 65ef5f8

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +10 -9
ocr_engine.py CHANGED
@@ -4,19 +4,20 @@ import numpy as np
4
  from PIL import Image
5
 
6
  def extract_weight(pil_image: Image.Image) -> str:
7
- # Convert PIL to OpenCV image
8
  img = np.array(pil_image.convert("RGB"))
9
-
10
- # Convert to grayscale
11
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
12
 
13
- # Optional: Resize to improve accuracy
14
  gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
 
 
15
 
16
- # OCR config for digits only
17
- config = "--psm 7 digits"
18
- text = pytesseract.image_to_string(gray, config=config)
19
 
20
- # Keep digits and decimal
21
  weight = ''.join(filter(lambda x: x in '0123456789.', text))
22
- return weight if weight else "No valid weight detected"
 
 
4
  from PIL import Image
5
 
6
  def extract_weight(pil_image: Image.Image) -> str:
7
+ # Convert to OpenCV format
8
  img = np.array(pil_image.convert("RGB"))
 
 
9
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
10
 
11
+ # Enhance image for OCR
12
  gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
13
+ blurred = cv2.GaussianBlur(gray, (5, 5), 0)
14
+ _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
15
 
16
+ # OCR with config
17
+ config = "--psm 7 -c tessedit_char_whitelist=0123456789."
18
+ text = pytesseract.image_to_string(thresh, config=config)
19
 
20
+ # Extract digits and decimal
21
  weight = ''.join(filter(lambda x: x in '0123456789.', text))
22
+
23
+ return weight.strip() if weight else "No valid weight detected"