Update app/utils.py
Browse files- app/utils.py +34 -30
app/utils.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# utils.py
|
2 |
import os
|
3 |
-
from transformers import
|
4 |
from PIL import Image, ImageEnhance, ImageFilter
|
5 |
import torch
|
6 |
import logging
|
@@ -20,16 +20,20 @@ class OCRModel:
|
|
20 |
try:
|
21 |
logger.info("Initializing OCR model...")
|
22 |
|
23 |
-
# تهيئة النموذج
|
24 |
-
self.
|
25 |
-
self.model =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
# تحديد الجهاز
|
28 |
-
self.device = "
|
29 |
-
|
30 |
-
|
31 |
-
self.model.to(self.device)
|
32 |
-
self.model.eval()
|
33 |
|
34 |
logger.info("Model initialization completed successfully")
|
35 |
|
@@ -68,33 +72,33 @@ class OCRModel:
|
|
68 |
try:
|
69 |
logger.info("Starting image processing")
|
70 |
|
|
|
|
|
|
|
71 |
# إعادة تعيين مؤشر البداية للـ BytesIO
|
72 |
image_stream.seek(0)
|
73 |
|
74 |
-
# فتح الصورة
|
75 |
image = Image.open(image_stream).convert('RGB')
|
76 |
-
|
77 |
-
# تطبيق المعالجة المسبقة
|
78 |
processed_image = self.preprocess_image(image)
|
|
|
79 |
|
80 |
-
#
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
with torch.no_grad():
|
85 |
-
generated_ids = self.model.generate(
|
86 |
-
pixel_values,
|
87 |
-
max_length=128,
|
88 |
-
num_beams=4,
|
89 |
-
length_penalty=2.0,
|
90 |
-
early_stopping=True
|
91 |
-
)
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
except Exception as e:
|
100 |
logger.error(f"Error in image processing: {str(e)}", exc_info=True)
|
|
|
1 |
# utils.py
|
2 |
import os
|
3 |
+
from transformers import AutoModel, AutoTokenizer
|
4 |
from PIL import Image, ImageEnhance, ImageFilter
|
5 |
import torch
|
6 |
import logging
|
|
|
20 |
try:
|
21 |
logger.info("Initializing OCR model...")
|
22 |
|
23 |
+
# تهيئة النموذج والتوكينايزر
|
24 |
+
self.tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
|
25 |
+
self.model = AutoModel.from_pretrained(
|
26 |
+
'ucaslcl/GOT-OCR2_0',
|
27 |
+
trust_remote_code=True,
|
28 |
+
low_cpu_mem_usage=True,
|
29 |
+
device_map='cpu',
|
30 |
+
use_safetensors=True,
|
31 |
+
pad_token_id=self.tokenizer.eos_token_id
|
32 |
+
)
|
33 |
|
34 |
+
# تحديد الجهاز وتهيئة النموذج للتقييم
|
35 |
+
self.device = "cpu" # هذا النموذج مصمم للـ CPU
|
36 |
+
self.model = self.model.eval().cpu()
|
|
|
|
|
|
|
37 |
|
38 |
logger.info("Model initialization completed successfully")
|
39 |
|
|
|
72 |
try:
|
73 |
logger.info("Starting image processing")
|
74 |
|
75 |
+
# حفظ الصورة مؤقتاً لأن النموذج يتطلب مسار ملف
|
76 |
+
temp_image_path = "temp_image.jpg"
|
77 |
+
|
78 |
# إعادة تعيين مؤشر البداية للـ BytesIO
|
79 |
image_stream.seek(0)
|
80 |
|
81 |
+
# فتح وحفظ الصورة مؤقتاً
|
82 |
image = Image.open(image_stream).convert('RGB')
|
|
|
|
|
83 |
processed_image = self.preprocess_image(image)
|
84 |
+
processed_image.save(temp_image_path)
|
85 |
|
86 |
+
# استخدام النموذج للتعرف على النص
|
87 |
+
try:
|
88 |
+
result = self.model.chat(self.tokenizer, temp_image_path, ocr_type='format')
|
89 |
+
logger.info(f"Successfully extracted text: {result[:100]}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
+
# حذف الملف المؤقت
|
92 |
+
if os.path.exists(temp_image_path):
|
93 |
+
os.remove(temp_image_path)
|
94 |
+
|
95 |
+
return result.strip()
|
96 |
+
|
97 |
+
except Exception as e:
|
98 |
+
logger.error(f"Error in OCR processing: {str(e)}", exc_info=True)
|
99 |
+
if os.path.exists(temp_image_path):
|
100 |
+
os.remove(temp_image_path)
|
101 |
+
raise
|
102 |
|
103 |
except Exception as e:
|
104 |
logger.error(f"Error in image processing: {str(e)}", exc_info=True)
|