Sanjayraju30 commited on
Commit
17d218a
·
verified ·
1 Parent(s): 357e812

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +12 -4
ocr_engine.py CHANGED
@@ -1,17 +1,25 @@
1
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
2
  from PIL import Image
3
  import torch
 
4
 
5
- # Load processor and model only once
6
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
7
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
8
 
9
  def extract_weight(image):
10
  try:
11
- # Resize or preprocess if needed
12
  pixel_values = processor(images=image, return_tensors="pt").pixel_values
13
  generated_ids = model.generate(pixel_values)
14
- text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
15
- return text.strip()
 
 
 
 
 
 
 
16
  except Exception as e:
17
  return f"Error: {str(e)}"
 
1
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
2
  from PIL import Image
3
  import torch
4
+ import re
5
 
6
+ # Load processor and model once
7
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
8
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
9
 
10
  def extract_weight(image):
11
  try:
12
+ # OCR inference
13
  pixel_values = processor(images=image, return_tensors="pt").pixel_values
14
  generated_ids = model.generate(pixel_values)
15
+ text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
16
+ print("OCR Output:", text)
17
+
18
+ # Extract valid float or integer from OCR result
19
+ match = re.search(r'\d{2,5}(\.\d{1,2})?', text) # e.g., 65 or 5325.0
20
+ if match:
21
+ return match.group() + " kg"
22
+ else:
23
+ return "No valid weight found"
24
  except Exception as e:
25
  return f"Error: {str(e)}"