Sanjayraju30 commited on
Commit
e4046d9
Β·
verified Β·
1 Parent(s): d1b5503

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +21 -15
ocr_engine.py CHANGED
@@ -5,30 +5,36 @@ from PIL import Image
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
8
- # Convert PIL to OpenCV
9
  img = pil_img.convert("RGB")
10
- img = np.array(img)
11
- img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
12
 
13
  # Convert to grayscale
14
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
15
 
16
- # Thresholding to highlight digits
17
- _, binary = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY_INV)
 
 
18
 
19
- # Resize for better OCR
20
- resized = cv2.resize(binary, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
21
 
22
- # Run OCR with digit whitelist
23
- config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'
24
- raw_text = pytesseract.image_to_string(resized, config=config)
25
 
26
- print("πŸ” OCR Raw Output:", repr(raw_text)) # Show in Hugging Face logs
 
 
 
 
 
 
 
27
 
28
- # Filter for digits only
29
- weight = ''.join(filter(lambda c: c in '0123456789.', raw_text))
30
  confidence = 95 if weight else 0
31
- return weight.strip(), confidence
32
 
33
  except Exception as e:
34
  print("❌ OCR Error:", str(e))
 
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
8
+ # Convert PIL image to OpenCV format
9
  img = pil_img.convert("RGB")
10
+ img_np = np.array(img)
11
+ img_cv = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
12
 
13
  # Convert to grayscale
14
+ gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
15
 
16
+ # Adaptive Thresholding for 7-segment LCD
17
+ processed = cv2.adaptiveThreshold(
18
+ gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 10
19
+ )
20
 
21
+ # Resize to enhance small text
22
+ resized = cv2.resize(processed, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
23
 
24
+ # OCR config tuned for digit blocks
25
+ config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
 
26
 
27
+ # Run OCR
28
+ text = pytesseract.image_to_string(resized, config=config)
29
+
30
+ print("πŸ” RAW OCR OUTPUT:", repr(text))
31
+
32
+ # Clean the text
33
+ weight = ''.join(c for c in text if c in '0123456789.')
34
+ weight = weight.strip()
35
 
 
 
36
  confidence = 95 if weight else 0
37
+ return weight, confidence
38
 
39
  except Exception as e:
40
  print("❌ OCR Error:", str(e))