Spaces:
Runtime error
Runtime error
Update ocr_engine.py
Browse files- ocr_engine.py +2 -2
ocr_engine.py
CHANGED
@@ -3,14 +3,14 @@ from PIL import Image
|
|
3 |
import torch
|
4 |
import re
|
5 |
|
6 |
-
# Load TrOCR once
|
7 |
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
|
8 |
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
|
9 |
|
10 |
def clean_ocr_text(text):
|
11 |
print("[RAW OCR]", text)
|
12 |
text = text.replace(",", ".").replace("s", "5").replace("o", "0").lower()
|
13 |
-
text = re.sub(r"[^\d\.kg]", "", text) # Keep
|
14 |
print("[CLEANED OCR]", text)
|
15 |
return text
|
16 |
|
|
|
3 |
import torch
|
4 |
import re
|
5 |
|
6 |
+
# Load TrOCR model and processor once
|
7 |
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
|
8 |
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
|
9 |
|
10 |
def clean_ocr_text(text):
|
11 |
print("[RAW OCR]", text)
|
12 |
text = text.replace(",", ".").replace("s", "5").replace("o", "0").lower()
|
13 |
+
text = re.sub(r"[^\d\.kg]", "", text) # Keep digits, dot, kg
|
14 |
print("[CLEANED OCR]", text)
|
15 |
return text
|
16 |
|