Sanjayraju30 commited on
Commit
f47b893
·
verified ·
1 Parent(s): 1279f5b

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +30 -21
ocr_engine.py CHANGED
@@ -1,25 +1,34 @@
1
- import cv2
2
- import pytesseract
3
- import numpy as np
4
- from PIL import Image
 
 
 
5
 
6
- def extract_weight_from_image(pil_img):
7
- try:
8
- # Convert PIL image to OpenCV
9
- img = pil_img.convert("RGB")
10
- img = np.array(img)
11
- img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
12
 
13
- # Preprocess
14
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
15
- blur = cv2.GaussianBlur(gray, (3, 3), 0)
16
 
17
- # OCR
18
- text = pytesseract.image_to_string(blur, config='--psm 7 digits')
19
- weight = ''.join(filter(lambda c: c in '0123456789.', text))
20
 
21
- confidence = 95 # Replace with real confidence logic if needed
22
- return weight.strip(), confidence
23
- except Exception as e:
24
- print(f"OCR error: {e}")
25
- return "", 0
 
 
 
 
 
 
 
 
 
 
 
 
1
+ try:
2
+ from paddleocr import PaddleOCR
3
+ import re
4
+ ocr = PaddleOCR(use_angle_cls=True, lang='en')
5
+ except Exception as e:
6
+ print(f"❌ PaddleOCR failed to load: {e}")
7
+ ocr = None
8
 
9
+ def extract_weight_from_image(image):
10
+ if ocr is None:
11
+ return ("OCR not initialized", 0.0)
 
 
 
12
 
13
+ result = ocr.ocr(image, cls=True)
14
+ debug_texts = []
 
15
 
16
+ if not result or not result[0]:
17
+ return ("No weight detected", 0.0)
 
18
 
19
+ for line in result[0]:
20
+ text, confidence = line[1][0], line[1][1]
21
+ debug_texts.append(f"{text} (Conf: {confidence:.2f})")
22
+
23
+ # Regex: number with optional kg/g
24
+ match = re.search(r'(\d+\.?\d*)\s*(kg|g)?', text.lower())
25
+ if match:
26
+ weight = match.group(1)
27
+ unit = match.group(2) if match.group(2) else "g"
28
+ return (f"{weight} {unit}", confidence)
29
+
30
+ print("🧪 OCR DEBUG:")
31
+ for t in debug_texts:
32
+ print(t)
33
+
34
+ return ("No weight detected", 0.0)