Sanjayraju30 commited on
Commit
6b14fa5
·
verified ·
1 Parent(s): 1b98bd7

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +12 -18
ocr_engine.py CHANGED
@@ -1,37 +1,31 @@
1
- import pytesseract
2
  import numpy as np
3
  import re
4
  import cv2
5
- from PIL import Image
 
6
 
7
  def extract_weight_from_image(pil_img):
8
  try:
9
- # Convert PIL image to numpy array
10
  img = np.array(pil_img)
11
 
12
  # Convert to grayscale
13
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
14
 
15
- # Resize image to improve OCR accuracy
16
  resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
17
 
18
- # Apply Gaussian blur
19
  blurred = cv2.GaussianBlur(resized, (5, 5), 0)
 
20
 
21
- # Apply adaptive thresholding for better contrast
22
- thresh = cv2.adaptiveThreshold(
23
- blurred, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 2
24
- )
25
-
26
- # OCR config: use digit-only mode and whitelist characters
27
- config = "--psm 7 -c tessedit_char_whitelist=0123456789."
28
-
29
- # Extract text using pytesseract
30
- ocr_text = pytesseract.image_to_string(thresh, config=config)
31
- print("OCR Text:", ocr_text)
32
 
33
- # Use regex to find weight values (e.g., 52.35, 002.50 etc.)
34
- match = re.search(r"\b\d{2,4}\.?\d{0,2}\b", ocr_text)
35
  if match:
36
  return match.group(), 95.0
37
  else:
 
1
+ import easyocr
2
  import numpy as np
3
  import re
4
  import cv2
5
+
6
+ reader = easyocr.Reader(['en'], gpu=False)
7
 
8
  def extract_weight_from_image(pil_img):
9
  try:
 
10
  img = np.array(pil_img)
11
 
12
  # Convert to grayscale
13
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
14
 
15
+ # Resize to improve OCR accuracy
16
  resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
17
 
18
+ # Blur and threshold
19
  blurred = cv2.GaussianBlur(resized, (5, 5), 0)
20
+ _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
21
 
22
+ # OCR
23
+ result = reader.readtext(thresh, detail=0)
24
+ text = " ".join(result)
25
+ print("OCR Text:", text)
 
 
 
 
 
 
 
26
 
27
+ # Regex to find weight like 002.50 or 55.3
28
+ match = re.search(r"\b\d{2,4}\.?\d{0,2}\b", text)
29
  if match:
30
  return match.group(), 95.0
31
  else: