Sanjayraju30 commited on
Commit
81b527b
·
verified ·
1 Parent(s): 17d218a

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +19 -20
ocr_engine.py CHANGED
@@ -1,25 +1,24 @@
1
- from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 
2
  from PIL import Image
3
- import torch
4
  import re
5
 
6
- # Load processor and model once
7
- processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
8
- model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
 
 
 
9
 
10
- def extract_weight(image):
11
- try:
12
- # OCR inference
13
- pixel_values = processor(images=image, return_tensors="pt").pixel_values
14
- generated_ids = model.generate(pixel_values)
15
- text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
16
- print("OCR Output:", text)
17
 
18
- # Extract valid float or integer from OCR result
19
- match = re.search(r'\d{2,5}(\.\d{1,2})?', text) # e.g., 65 or 5325.0
20
- if match:
21
- return match.group() + " kg"
22
- else:
23
- return "No valid weight found"
24
- except Exception as e:
25
- return f"Error: {str(e)}"
 
 
1
+ import cv2
2
+ import pytesseract
3
  from PIL import Image
 
4
  import re
5
 
6
+ def extract_weight(img_path):
7
+ img = cv2.imread(img_path)
8
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
9
+
10
+ # OCR
11
+ text = pytesseract.image_to_string(gray, config='--psm 6')
12
 
13
+ # Clean and lower text
14
+ text = text.lower().replace('\n', ' ').strip()
 
 
 
 
 
15
 
16
+ # Regex to find pattern like 52.25 g or 75.8 kg
17
+ match = re.search(r'(\d+\.\d+|\d+)\s*(kg|g)', text)
18
+
19
+ if match:
20
+ number = match.group(1)
21
+ unit = match.group(2)
22
+ return f"{number} {unit}"
23
+ else:
24
+ return "Weight not detected"