Sanjayraju30 commited on
Commit
45d0a85
·
verified ·
1 Parent(s): 8daea2b

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +10 -24
ocr_engine.py CHANGED
@@ -5,35 +5,21 @@ from PIL import Image
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
8
- # Step 1: Convert PIL to OpenCV
9
- img = pil_img.convert("L") # grayscale
10
  img = np.array(img)
 
11
 
12
- # Step 2: Resize image for better OCR accuracy
13
- img = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
 
14
 
15
- # Step 3: Apply Gaussian Blur to remove noise
16
- blur = cv2.GaussianBlur(img, (5, 5), 0)
17
-
18
- # Step 4: Apply Adaptive Thresholding
19
- thresh = cv2.adaptiveThreshold(
20
- blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
21
- cv2.THRESH_BINARY_INV, 11, 2
22
- )
23
-
24
- # Step 5: OCR Config - digits only
25
- config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'
26
-
27
- # Step 6: Run OCR
28
- text = pytesseract.image_to_string(thresh, config=config)
29
- print("🔍 OCR RAW OUTPUT:", repr(text)) # view this in Hugging Face logs
30
-
31
- # Step 7: Extract numbers
32
  weight = ''.join(filter(lambda c: c in '0123456789.', text))
33
 
34
- confidence = 95 if weight else 0
35
  return weight.strip(), confidence
36
-
37
  except Exception as e:
38
- print("OCR Exception:", str(e))
39
  return "", 0
 
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
8
+ # Convert PIL image to OpenCV
9
+ img = pil_img.convert("RGB")
10
  img = np.array(img)
11
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
12
 
13
+ # Preprocess
14
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
15
+ blur = cv2.GaussianBlur(gray, (3, 3), 0)
16
 
17
+ # OCR
18
+ text = pytesseract.image_to_string(blur, config='--psm 7 digits')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  weight = ''.join(filter(lambda c: c in '0123456789.', text))
20
 
21
+ confidence = 95 # Replace with real confidence logic if needed
22
  return weight.strip(), confidence
 
23
  except Exception as e:
24
+ print(f"OCR error: {e}")
25
  return "", 0