Sanjayraju30 commited on
Commit
9661246
·
verified ·
1 Parent(s): cdb355d

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +25 -15
ocr_engine.py CHANGED
@@ -1,40 +1,50 @@
1
- import pytesseract
2
  import numpy as np
3
  import cv2
4
  import re
5
- from PIL import Image
 
6
 
7
  def extract_weight_from_image(pil_img):
8
  try:
9
  img = np.array(pil_img)
10
 
 
 
 
 
11
  # Convert to grayscale
12
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
13
 
14
- # Resize and enhance
15
  gray = cv2.resize(gray, None, fx=4, fy=4, interpolation=cv2.INTER_LINEAR)
16
- gray = cv2.GaussianBlur(gray, (3, 3), 0)
 
17
  gray = cv2.equalizeHist(gray)
 
18
 
19
- # Threshold
20
- _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 
21
 
22
- # Invert if dark background
23
- if np.mean(thresh > 127) < 0.5:
 
24
  thresh = cv2.bitwise_not(thresh)
25
 
26
- # OCR using Tesseract
27
- config = "--psm 6 -c tessedit_char_whitelist=0123456789."
28
- text = pytesseract.image_to_string(thresh, config=config)
29
- print("🔍 OCR Text:", text)
30
 
31
- # Extract weight
32
- match = re.search(r"\d{1,3}(?:\.\d{1,2})?", text)
33
  if match:
34
- weight = match.group()
35
  return f"{weight} kg", 100.0
36
  else:
37
  return "No weight detected kg", 0.0
38
 
39
  except Exception as e:
 
40
  return f"Error: {str(e)}", 0.0
 
1
+ import easyocr
2
  import numpy as np
3
  import cv2
4
  import re
5
+
6
+ reader = easyocr.Reader(['en'], gpu=False)
7
 
8
  def extract_weight_from_image(pil_img):
9
  try:
10
  img = np.array(pil_img)
11
 
12
+ # Resize image for consistency
13
+ if img.shape[1] > 1000:
14
+ img = cv2.resize(img, (1000, int(img.shape[0] * 1000 / img.shape[1])))
15
+
16
  # Convert to grayscale
17
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
18
 
19
+ # Upscale image
20
  gray = cv2.resize(gray, None, fx=4, fy=4, interpolation=cv2.INTER_LINEAR)
21
+
22
+ # Histogram Equalization and slight blur
23
  gray = cv2.equalizeHist(gray)
24
+ blurred = cv2.GaussianBlur(gray, (3, 3), 0)
25
 
26
+ # Adaptive threshold
27
+ thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
28
+ cv2.THRESH_BINARY, 11, 2)
29
 
30
+ # Invert if needed
31
+ white_ratio = np.mean(thresh > 127)
32
+ if white_ratio < 0.5:
33
  thresh = cv2.bitwise_not(thresh)
34
 
35
+ # OCR
36
+ result = reader.readtext(thresh, detail=0)
37
+ print("🧠 OCR Raw Output:", result)
38
+ combined_text = " ".join(result).strip()
39
 
40
+ # Extract number
41
+ match = re.search(r"(\d{1,4}(?:\.\d{1,2})?)", combined_text)
42
  if match:
43
+ weight = match.group(1)
44
  return f"{weight} kg", 100.0
45
  else:
46
  return "No weight detected kg", 0.0
47
 
48
  except Exception as e:
49
+ print("❌ OCR Error:", e)
50
  return f"Error: {str(e)}", 0.0