Sanjayraju30 commited on
Commit
8fe1b94
·
verified ·
1 Parent(s): 701d11a

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +21 -10
ocr_engine.py CHANGED
@@ -1,30 +1,41 @@
1
  import pytesseract
2
  import numpy as np
3
- import cv2
4
  import re
 
 
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
 
8
  img = np.array(pil_img)
9
 
10
  # Convert to grayscale
11
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
12
 
13
- # Resize for better OCR
14
- gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
 
 
 
15
 
16
- # Thresholding
17
- _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 
 
18
 
19
- # OCR using pytesseract
20
- text = pytesseract.image_to_string(thresh, config='--psm 6 digits')
21
- print("OCR Output:", text)
22
 
23
- # Regex to extract weight (like 54.20, 102.5, etc.)
24
- match = re.search(r"\b\d{2,4}\.?\d{0,2}\b", text)
 
 
 
 
25
  if match:
26
  return match.group(), 95.0
27
  else:
28
  return "No weight detected", 0.0
 
29
  except Exception as e:
30
  return f"Error: {str(e)}", 0.0
 
1
  import pytesseract
2
  import numpy as np
 
3
  import re
4
+ import cv2
5
+ from PIL import Image
6
 
7
  def extract_weight_from_image(pil_img):
8
  try:
9
+ # Convert PIL image to numpy array
10
  img = np.array(pil_img)
11
 
12
  # Convert to grayscale
13
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
14
 
15
+ # Resize image to improve OCR accuracy
16
+ resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
17
+
18
+ # Apply Gaussian blur
19
+ blurred = cv2.GaussianBlur(resized, (5, 5), 0)
20
 
21
+ # Apply adaptive thresholding for better contrast
22
+ thresh = cv2.adaptiveThreshold(
23
+ blurred, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 2
24
+ )
25
 
26
+ # OCR config: use digit-only mode and whitelist characters
27
+ config = "--psm 7 -c tessedit_char_whitelist=0123456789."
 
28
 
29
+ # Extract text using pytesseract
30
+ ocr_text = pytesseract.image_to_string(thresh, config=config)
31
+ print("OCR Text:", ocr_text)
32
+
33
+ # Use regex to find weight values (e.g., 52.35, 002.50 etc.)
34
+ match = re.search(r"\b\d{2,4}\.?\d{0,2}\b", ocr_text)
35
  if match:
36
  return match.group(), 95.0
37
  else:
38
  return "No weight detected", 0.0
39
+
40
  except Exception as e:
41
  return f"Error: {str(e)}", 0.0