Sanjayraju30 commited on
Commit
c11fb3a
·
verified ·
1 Parent(s): 45d0a85

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +42 -16
ocr_engine.py CHANGED
@@ -1,25 +1,51 @@
 
 
1
  import cv2
2
- import pytesseract
3
  import numpy as np
4
  from PIL import Image
5
 
6
- def extract_weight_from_image(pil_img):
7
- try:
8
- # Convert PIL image to OpenCV
9
- img = pil_img.convert("RGB")
10
- img = np.array(img)
11
- img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
 
 
 
 
 
 
 
 
12
 
13
- # Preprocess
14
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
15
- blur = cv2.GaussianBlur(gray, (3, 3), 0)
 
 
 
 
16
 
17
  # OCR
18
- text = pytesseract.image_to_string(blur, config='--psm 7 digits')
19
- weight = ''.join(filter(lambda c: c in '0123456789.', text))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- confidence = 95 # Replace with real confidence logic if needed
22
- return weight.strip(), confidence
23
  except Exception as e:
24
- print(f"OCR error: {e}")
25
- return "", 0
 
1
+ import easyocr
2
+ import re
3
  import cv2
 
4
  import numpy as np
5
  from PIL import Image
6
 
7
+ # Initialize EasyOCR reader (only once)
8
+ reader = easyocr.Reader(['en'], gpu=False)
9
+
10
+ def preprocess_image(image):
11
+ # Convert to grayscale
12
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
13
+
14
+ # Apply thresholding (adaptive works well for 7-seg)
15
+ thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
16
+ cv2.THRESH_BINARY_INV, 15, 10)
17
+
18
+ # Dilation to strengthen numbers
19
+ kernel = np.ones((2, 2), np.uint8)
20
+ dilated = cv2.dilate(thresh, kernel, iterations=1)
21
 
22
+ return dilated
23
+
24
+ def extract_weight_from_image(pil_image):
25
+ try:
26
+ # Convert PIL to OpenCV
27
+ image = np.array(pil_image.convert("RGB"))
28
+ processed = preprocess_image(image)
29
 
30
  # OCR
31
+ result = reader.readtext(processed)
32
+
33
+ # Filter and extract digits like weight (e.g., 75.5)
34
+ weight = None
35
+ confidence = 0.0
36
+ for detection in result:
37
+ text = detection[1]
38
+ conf = detection[2]
39
+ match = re.search(r"\d{2,4}(\.\d{1,2})?", text) # match 2-4 digit decimal
40
+ if match:
41
+ weight = match.group()
42
+ confidence = conf
43
+ break
44
+
45
+ if weight:
46
+ return weight, round(confidence * 100, 2)
47
+ else:
48
+ return "No weight detected", 0.0
49
 
 
 
50
  except Exception as e:
51
+ return f"Error: {str(e)}", 0.0