Sanjayraju30 commited on
Commit
1dbcf19
·
verified ·
1 Parent(s): d5ceb6c

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +23 -14
ocr_engine.py CHANGED
@@ -1,19 +1,28 @@
 
1
  from PIL import Image
2
- import pytesseract
3
  import re
4
 
5
- def extract_weight(img_path):
6
- img = Image.open(img_path).convert("L") # Grayscale
 
7
 
8
- # OCR
9
- text = pytesseract.image_to_string(img, config='--psm 6')
10
- text = text.lower().replace('\n', ' ').strip()
 
 
 
 
11
 
12
- # Find weight + unit (e.g., 52.25 g, 75.8 kg)
13
- match = re.search(r'(\d+\.\d+|\d+)\s*(kg|g)', text)
14
- if match:
15
- number = match.group(1)
16
- unit = match.group(2)
17
- return f"{number} {unit}"
18
- else:
19
- return "Weight not detected"
 
 
 
 
1
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
2
  from PIL import Image
3
+ import torch
4
  import re
5
 
6
+ # Load TrOCR model and processor once
7
+ processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
8
+ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
9
 
10
+ def extract_weight(image):
11
+ try:
12
+ # OCR Inference
13
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
14
+ generated_ids = model.generate(pixel_values)
15
+ text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
16
+ print("OCR Output:", text)
17
 
18
+ # Pattern to detect weight with optional decimal and unit (g or kg)
19
+ match = re.search(r'(\d{1,5}(?:\.\d{1,3})?)\s*(kg|g)', text.lower())
20
+
21
+ if match:
22
+ value = match.group(1)
23
+ unit = match.group(2)
24
+ return f"{value} {unit}"
25
+ else:
26
+ return "No valid weight found"
27
+ except Exception as e:
28
+ return f"Error: {str(e)}"