Sanjayraju30 commited on
Commit
701d11a
·
verified ·
1 Parent(s): 6c9a667

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +10 -8
ocr_engine.py CHANGED
@@ -7,19 +7,21 @@ def extract_weight_from_image(pil_img):
7
  try:
8
  img = np.array(pil_img)
9
 
10
- # Preprocessing
11
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
12
- resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
13
- blur = cv2.GaussianBlur(resized, (5, 5), 0)
14
- _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 
 
 
15
 
16
  # OCR using pytesseract
17
- config = "--psm 6" # Assume a single uniform block of text
18
- text = pytesseract.image_to_string(thresh, config=config)
19
  print("OCR Output:", text)
20
 
21
- # Regex to find weight-like numbers
22
- match = re.search(r"\b\d{1,4}\.?\d{0,2}\b", text)
23
  if match:
24
  return match.group(), 95.0
25
  else:
 
7
  try:
8
  img = np.array(pil_img)
9
 
10
+ # Convert to grayscale
11
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
12
+
13
+ # Resize for better OCR
14
+ gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
15
+
16
+ # Thresholding
17
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
18
 
19
  # OCR using pytesseract
20
+ text = pytesseract.image_to_string(thresh, config='--psm 6 digits')
 
21
  print("OCR Output:", text)
22
 
23
+ # Regex to extract weight (like 54.20, 102.5, etc.)
24
+ match = re.search(r"\b\d{2,4}\.?\d{0,2}\b", text)
25
  if match:
26
  return match.group(), 95.0
27
  else: