Sanjayraju30 commited on
Commit
7605648
·
verified ·
1 Parent(s): 2c62dab

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +23 -29
ocr_engine.py CHANGED
@@ -5,41 +5,35 @@ from PIL import Image
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
8
- # Convert PIL image to OpenCV format
9
- img = pil_img.convert("RGB")
10
- img_np = np.array(img)
11
- img_cv = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
12
- print("🧼 Image converted to OpenCV format.")
13
-
14
- # Convert to grayscale
15
- gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
16
-
17
- # Adaptive Thresholding for 7-segment LCD
18
- processed = cv2.adaptiveThreshold(
19
- gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 10
20
- )
21
 
22
- # Resize to enhance small text
23
- resized = cv2.resize(processed, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
24
 
25
- # Optional: Apply dilation to enhance contours
26
- kernel = np.ones((2, 2), np.uint8)
27
- dilated = cv2.dilate(resized, kernel, iterations=1)
 
 
28
 
29
- # OCR config tuned for digits
30
- config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
31
 
32
- # Run OCR
33
- text = pytesseract.image_to_string(dilated, config=config)
34
- print("🔍 RAW OCR OUTPUT:", repr(text))
35
 
36
- # Clean the text
37
- weight = ''.join(c for c in text if c in '0123456789.')
38
- weight = weight.strip()
39
 
40
  confidence = 95 if weight else 0
41
- return weight, confidence
42
 
43
  except Exception as e:
44
- print("❌ OCR Error:", str(e))
45
- return "", 0
 
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
8
+ # Step 1: Convert PIL to OpenCV
9
+ img = pil_img.convert("L") # grayscale
10
+ img = np.array(img)
11
+
12
+ # Step 2: Resize image for better OCR accuracy
13
+ img = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
 
 
 
 
 
 
 
14
 
15
+ # Step 3: Apply Gaussian Blur to remove noise
16
+ blur = cv2.GaussianBlur(img, (5, 5), 0)
17
 
18
+ # Step 4: Apply Adaptive Thresholding
19
+ thresh = cv2.adaptiveThreshold(
20
+ blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
21
+ cv2.THRESH_BINARY_INV, 11, 2
22
+ )
23
 
24
+ # Step 5: OCR Config - digits only
25
+ config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'
26
 
27
+ # Step 6: Run OCR
28
+ text = pytesseract.image_to_string(thresh, config=config)
29
+ print("🔍 OCR RAW OUTPUT:", repr(text)) # view this in Hugging Face logs
30
 
31
+ # Step 7: Extract numbers
32
+ weight = ''.join(filter(lambda c: c in '0123456789.', text))
 
33
 
34
  confidence = 95 if weight else 0
35
+ return weight.strip(), confidence
36
 
37
  except Exception as e:
38
+ print("❌ OCR Exception:", str(e))
39
+ return "", 0