Sanjayraju30 commited on
Commit
65ddb11
·
verified ·
1 Parent(s): 81b527b

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +5 -10
ocr_engine.py CHANGED
@@ -1,21 +1,16 @@
1
- import cv2
2
- import pytesseract
3
  from PIL import Image
 
4
  import re
5
 
6
  def extract_weight(img_path):
7
- img = cv2.imread(img_path)
8
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
9
-
10
- # OCR
11
- text = pytesseract.image_to_string(gray, config='--psm 6')
12
 
13
- # Clean and lower text
 
14
  text = text.lower().replace('\n', ' ').strip()
15
 
16
- # Regex to find pattern like 52.25 g or 75.8 kg
17
  match = re.search(r'(\d+\.\d+|\d+)\s*(kg|g)', text)
18
-
19
  if match:
20
  number = match.group(1)
21
  unit = match.group(2)
 
 
 
1
  from PIL import Image
2
+ import pytesseract
3
  import re
4
 
5
  def extract_weight(img_path):
6
+ img = Image.open(img_path).convert("L") # Grayscale
 
 
 
 
7
 
8
+ # OCR
9
+ text = pytesseract.image_to_string(img, config='--psm 6')
10
  text = text.lower().replace('\n', ' ').strip()
11
 
12
+ # Find weight + unit (e.g., 52.25 g, 75.8 kg)
13
  match = re.search(r'(\d+\.\d+|\d+)\s*(kg|g)', text)
 
14
  if match:
15
  number = match.group(1)
16
  unit = match.group(2)