Sanjayraju30 commited on
Commit
0c37258
·
verified ·
1 Parent(s): 1362c73

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +15 -13
ocr_engine.py CHANGED
@@ -5,29 +5,31 @@ from PIL import Image
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
8
- # Convert to OpenCV image
9
  img = pil_img.convert("RGB")
10
  img = np.array(img)
11
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
12
 
13
- # Crop or resize if needed (optional based on display layout)
14
-
15
- # Convert to grayscale and enhance contrast
16
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
17
- _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
18
 
19
- # Optional: Resize for better OCR (helps with small digits)
20
- resized = cv2.resize(thresh, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
 
 
 
 
 
 
 
21
 
22
- # Apply OCR with proper config for digits
23
- custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'
24
- text = pytesseract.image_to_string(resized, config=custom_config)
25
 
26
- # Filter out non-numeric parts
27
- weight = ''.join(filter(lambda c: c in '0123456789.', text))
28
  confidence = 95 if weight else 0
29
  return weight.strip(), confidence
30
 
31
  except Exception as e:
32
- print("OCR Exception:", str(e))
33
  return "", 0
 
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
8
+ # Convert PIL to OpenCV
9
  img = pil_img.convert("RGB")
10
  img = np.array(img)
11
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
12
 
13
+ # Convert to grayscale
 
 
14
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
15
 
16
+ # Thresholding to highlight digits
17
+ _, binary = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY_INV)
18
+
19
+ # Resize for better OCR
20
+ resized = cv2.resize(binary, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
21
+
22
+ # Run OCR with digit whitelist
23
+ config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'
24
+ raw_text = pytesseract.image_to_string(resized, config=config)
25
 
26
+ print("🔍 OCR Raw Output:", repr(raw_text)) # Show in Hugging Face logs
 
 
27
 
28
+ # Filter for digits only
29
+ weight = ''.join(filter(lambda c: c in '0123456789.', raw_text))
30
  confidence = 95 if weight else 0
31
  return weight.strip(), confidence
32
 
33
  except Exception as e:
34
+ print("OCR Error:", str(e))
35
  return "", 0