Sanjayraju30 commited on
Commit
c3e8195
Β·
verified Β·
1 Parent(s): 7ebef67

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +2 -2
ocr_engine.py CHANGED
@@ -3,14 +3,14 @@ from PIL import Image
3
  import torch
4
  import re
5
 
6
- # Load TrOCR once
7
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
8
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
9
 
10
  def clean_ocr_text(text):
11
  print("[RAW OCR]", text)
12
  text = text.replace(",", ".").replace("s", "5").replace("o", "0").lower()
13
- text = re.sub(r"[^\d\.kg]", "", text) # Keep only digits, dot, k, g
14
  print("[CLEANED OCR]", text)
15
  return text
16
 
 
3
  import torch
4
  import re
5
 
6
+ # Load TrOCR model and processor once
7
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
8
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
9
 
10
  def clean_ocr_text(text):
11
  print("[RAW OCR]", text)
12
  text = text.replace(",", ".").replace("s", "5").replace("o", "0").lower()
13
+ text = re.sub(r"[^\d\.kg]", "", text) # Keep digits, dot, kg
14
  print("[CLEANED OCR]", text)
15
  return text
16