Sanjayraju30 commited on
Commit
5607fce
·
verified ·
1 Parent(s): c042a27

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +10 -5
ocr_engine.py CHANGED
@@ -3,14 +3,14 @@ from PIL import Image
3
  import torch
4
  import re
5
 
6
- # Load TrOCR model and processor once
7
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
8
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
9
 
10
  def clean_ocr_text(text):
11
  print("[RAW OCR]", text)
12
  text = text.replace(",", ".").replace("s", "5").replace("o", "0").lower()
13
- text = re.sub(r"[^\d\.kg]", "", text) # Keep digits, dot, kg
14
  print("[CLEANED OCR]", text)
15
  return text
16
 
@@ -22,11 +22,16 @@ def extract_weight(image):
22
 
23
  cleaned = clean_ocr_text(raw_text)
24
 
25
- # Regex for weight
26
  match = re.search(r'(\d{1,5}(?:\.\d{1,3})?)\s*(kg|g)', cleaned)
27
  if match:
28
  return f"{match.group(1)} {match.group(2)}"
29
- else:
30
- return f"No valid weight found | OCR: {cleaned}"
 
 
 
 
 
31
  except Exception as e:
32
  return f"Error: {str(e)}"
 
3
  import torch
4
  import re
5
 
6
+ # Load model and processor
7
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
8
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
9
 
10
  def clean_ocr_text(text):
11
  print("[RAW OCR]", text)
12
  text = text.replace(",", ".").replace("s", "5").replace("o", "0").lower()
13
+ text = re.sub(r"[^\d\.kg]", "", text)
14
  print("[CLEANED OCR]", text)
15
  return text
16
 
 
22
 
23
  cleaned = clean_ocr_text(raw_text)
24
 
25
+ # First try with unit
26
  match = re.search(r'(\d{1,5}(?:\.\d{1,3})?)\s*(kg|g)', cleaned)
27
  if match:
28
  return f"{match.group(1)} {match.group(2)}"
29
+
30
+ # Fallback: only number, assume grams
31
+ fallback = re.search(r'(\d{1,5}(?:\.\d{1,3})?)', cleaned)
32
+ if fallback:
33
+ return f"{fallback.group(1)} g"
34
+
35
+ return f"No valid weight found | OCR: {cleaned}"
36
  except Exception as e:
37
  return f"Error: {str(e)}"