Upload 10 files
Browse files- Dockerfile +37 -0
- app.py +6 -0
- app/__init__.py +9 -0
- app/config.py +6 -0
- app/routes.py +117 -0
- app/utils.py +44 -0
- docker-compose.yml +21 -0
- render.yaml +13 -0
- requirements.txt +8 -0
- runtime.txt +1 -0
Dockerfile
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
4 |
+
ENV PYTHONUNBUFFERED=1
|
5 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
6 |
+
ENV MODEL_PATH=RufusRubin777/GOT-OCR2_0_CPU
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
# تثبيت المكتبات الأساسية
|
11 |
+
RUN apt-get update && apt-get install -y \
|
12 |
+
build-essential \
|
13 |
+
libpq-dev \
|
14 |
+
&& rm -rf /var/lib/apt/lists/*
|
15 |
+
|
16 |
+
# نسخ وتثبيت المتطلبات
|
17 |
+
COPY requirements.txt .
|
18 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
19 |
+
|
20 |
+
# إنشاء وتكوين مجلد الكاش
|
21 |
+
RUN mkdir -p /root/.cache/huggingface
|
22 |
+
VOLUME /root/.cache/huggingface
|
23 |
+
|
24 |
+
# تحميل النموذج مسبقاً
|
25 |
+
RUN python -c "from transformers_modules.RufusRubin777.GOT_OCR2_0_CPU.modeling_GOT import GOTModel, GOTConfig; \
|
26 |
+
from transformers import AutoTokenizer; \
|
27 |
+
model_path='RufusRubin777/GOT-OCR2_0_CPU'; \
|
28 |
+
config = GOTConfig.from_pretrained(model_path); \
|
29 |
+
model = GOTModel.from_pretrained(model_path, config=config); \
|
30 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)"
|
31 |
+
|
32 |
+
# نسخ كود التطبيق
|
33 |
+
COPY . .
|
34 |
+
|
35 |
+
EXPOSE 7863
|
36 |
+
|
37 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app import create_app
|
2 |
+
|
3 |
+
app = create_app()
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
app.run(host='0.0.0.0', port=7863)
|
app/__init__.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask
|
2 |
+
|
3 |
+
def create_app():
|
4 |
+
app = Flask(__name__)
|
5 |
+
|
6 |
+
from app.routes import main
|
7 |
+
app.register_blueprint(main)
|
8 |
+
|
9 |
+
return app
|
app/config.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
class Config:
|
4 |
+
SECRET_KEY = os.environ.get('SECRET_KEY') or 'your-secret-key'
|
5 |
+
UPLOAD_FOLDER = 'instance/uploads'
|
6 |
+
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max file size
|
app/routes.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Blueprint, jsonify, request
|
2 |
+
import io
|
3 |
+
from app.utils import OCRModel
|
4 |
+
|
5 |
+
main = Blueprint('main', __name__)
|
6 |
+
ocr_model = OCRModel()
|
7 |
+
|
8 |
+
# تحديد امتدادات الملفات المسموح بها
|
9 |
+
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg'}
|
10 |
+
|
11 |
+
# قائمة الحساسيات المعروفة (يمكن تخصيصها حسب الحاجة)
|
12 |
+
KNOWN_ALLERGENS = {
|
13 |
+
'gluten': ['wheat', 'barley', 'gluten'],
|
14 |
+
'dairy': ['milk', 'yogurt', 'cheese', 'lactose'],
|
15 |
+
'nuts': ['nuts', 'peanuts', 'almonds', 'walnuts'],
|
16 |
+
'eggs': ['eggs'],
|
17 |
+
'soy': ['soy'],
|
18 |
+
'fish': ['fish'],
|
19 |
+
'shellfish': ['oyster', 'shrimp', 'shrimp'],
|
20 |
+
}
|
21 |
+
|
22 |
+
def allowed_file(filename):
|
23 |
+
"""التحقق من صحة امتداد الملف"""
|
24 |
+
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
25 |
+
|
26 |
+
def find_allergens(text, user_allergens):
|
27 |
+
"""البحث عن الحساسيات في النص"""
|
28 |
+
text = text.lower()
|
29 |
+
found_allergens = set()
|
30 |
+
allergen_details = {}
|
31 |
+
|
32 |
+
for allergen in user_allergens:
|
33 |
+
allergen = allergen.strip().lower()
|
34 |
+
# البحث في القائمة الرئيسية للحساسيات
|
35 |
+
if allergen in KNOWN_ALLERGENS:
|
36 |
+
for variant in KNOWN_ALLERGENS[allergen]:
|
37 |
+
if variant.lower() in text:
|
38 |
+
found_allergens.add(allergen)
|
39 |
+
allergen_details[allergen] = variant
|
40 |
+
# البحث المباشر عن النص المدخل
|
41 |
+
elif allergen in text:
|
42 |
+
found_allergens.add(allergen)
|
43 |
+
allergen_details[allergen] = allergen
|
44 |
+
|
45 |
+
return found_allergens, allergen_details
|
46 |
+
|
47 |
+
@main.route('/')
|
48 |
+
def index():
|
49 |
+
return jsonify({
|
50 |
+
"message": "Welcome to the Text Recognition and Sensitivity Checking Service",
|
51 |
+
"endpoints": {
|
52 |
+
"/api/ocr": "POST - Image analysis and sensitivity testing",
|
53 |
+
},
|
54 |
+
"supported_formats": list(ALLOWED_EXTENSIONS),
|
55 |
+
"known_allergens": list(KNOWN_ALLERGENS.keys())
|
56 |
+
})
|
57 |
+
|
58 |
+
@main.route('/api/ocr', methods=['POST'])
|
59 |
+
def process_image():
|
60 |
+
# التحقق من وجود الملف
|
61 |
+
if 'file' not in request.files:
|
62 |
+
return jsonify({"error": "No file uploaded"}), 400
|
63 |
+
|
64 |
+
# التحقق من وجود قائمة الحساسيات
|
65 |
+
if 'allergens' not in request.form:
|
66 |
+
return jsonify({"error": "Sensitivities not specified"}), 400
|
67 |
+
|
68 |
+
file = request.files['file']
|
69 |
+
if file.filename == '':
|
70 |
+
return jsonify({"error": "No file selected"}), 400
|
71 |
+
|
72 |
+
# التحقق من نوع الملف
|
73 |
+
if not allowed_file(file.filename):
|
74 |
+
return jsonify({
|
75 |
+
"error": "File type not supported",
|
76 |
+
"supported_formats": list(ALLOWED_EXTENSIONS)
|
77 |
+
}), 400
|
78 |
+
|
79 |
+
# تحضير قائمة الحساسيات
|
80 |
+
user_allergens = request.form['allergens'].split(',')
|
81 |
+
|
82 |
+
try:
|
83 |
+
# قراءة الصورة
|
84 |
+
file_bytes = file.read()
|
85 |
+
file_stream = io.BytesIO(file_bytes)
|
86 |
+
|
87 |
+
# معالجة الصورة
|
88 |
+
extracted_text = ocr_model.process_image(file_stream)
|
89 |
+
|
90 |
+
# البحث عن الحساسيات
|
91 |
+
found_allergens, allergen_details = find_allergens(extracted_text, user_allergens)
|
92 |
+
|
93 |
+
# تحضير الرد
|
94 |
+
response = {
|
95 |
+
"success": True,
|
96 |
+
"extracted_text": extracted_text,
|
97 |
+
"analysis": {
|
98 |
+
"found_allergens": list(found_allergens),
|
99 |
+
"allergen_details": allergen_details,
|
100 |
+
"has_allergens": len(found_allergens) > 0,
|
101 |
+
"warning": "Warning: Allergens found!" if found_allergens else "No allergens found"
|
102 |
+
}
|
103 |
+
}
|
104 |
+
|
105 |
+
return jsonify(response)
|
106 |
+
|
107 |
+
except Exception as e:
|
108 |
+
return jsonify({
|
109 |
+
"error": "An error occurred while processing the image.",
|
110 |
+
"details": str(e)
|
111 |
+
}), 500
|
112 |
+
|
113 |
+
@main.route('/api/allergens', methods=['GET'])
|
114 |
+
def get_known_allergens():
|
115 |
+
return jsonify({
|
116 |
+
"allergens": KNOWN_ALLERGENS
|
117 |
+
})
|
app/utils.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from transformers import AutoModel, AutoTokenizer
|
3 |
+
|
4 |
+
class OCRModel:
|
5 |
+
_instance = None
|
6 |
+
|
7 |
+
def __new__(cls):
|
8 |
+
if cls._instance is None:
|
9 |
+
cls._instance = super(OCRModel, cls).__new__(cls)
|
10 |
+
cls._instance.initialize()
|
11 |
+
return cls._instance
|
12 |
+
|
13 |
+
def initialize(self):
|
14 |
+
# تحميل النموذج مرة واحدة وتخزينه محلياً
|
15 |
+
model_path = os.getenv('MODEL_PATH', 'RufusRubin777/GOT-OCR2_0_CPU')
|
16 |
+
|
17 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
18 |
+
model_path,
|
19 |
+
trust_remote_code=True,
|
20 |
+
local_files_only=False # سيتم تحميل الملفات إذا لم تكن موجودة
|
21 |
+
)
|
22 |
+
|
23 |
+
self.model = AutoModel.from_pretrained(
|
24 |
+
model_path,
|
25 |
+
trust_remote_code=True,
|
26 |
+
low_cpu_mem_usage=True,
|
27 |
+
device_map='cpu', # سيختار أفضل جهاز متاح
|
28 |
+
use_safetensors=True,
|
29 |
+
pad_token_id=self.tokenizer.eos_token_id
|
30 |
+
)
|
31 |
+
|
32 |
+
self.model = self.model.eval()
|
33 |
+
|
34 |
+
|
35 |
+
def process_image(self, image_stream):
|
36 |
+
try:
|
37 |
+
# فتح الصورة من الذاكرة
|
38 |
+
image = Image.open(image_stream)
|
39 |
+
|
40 |
+
with torch.no_grad():
|
41 |
+
result = self.model.chat(self.tokenizer, image, ocr_type='format')
|
42 |
+
return result
|
43 |
+
except Exception as e:
|
44 |
+
return f"Error processing image: {str(e)}"
|
docker-compose.yml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3.10'
|
2 |
+
|
3 |
+
services:
|
4 |
+
app:
|
5 |
+
build: .
|
6 |
+
ports:
|
7 |
+
- "7863:7863"
|
8 |
+
volumes:
|
9 |
+
- huggingface_cache:/root/.cache/huggingface
|
10 |
+
environment:
|
11 |
+
- MODEL_PATH=RufusRubin777/GOT-OCR2_0_CPU
|
12 |
+
restart: unless-stopped
|
13 |
+
deploy:
|
14 |
+
resources:
|
15 |
+
limits:
|
16 |
+
memory: 4G
|
17 |
+
reservations:
|
18 |
+
memory: 2G
|
19 |
+
|
20 |
+
volumes:
|
21 |
+
huggingface_cache:
|
render.yaml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
services:
|
2 |
+
- type: web
|
3 |
+
name: my-app
|
4 |
+
env: python
|
5 |
+
repo: https://github.com/ZienabMakhloof/ocr.git
|
6 |
+
branch: main
|
7 |
+
buildCommand: "pip install -r requirements.txt"
|
8 |
+
startCommand: "python app.py"
|
9 |
+
envVars:
|
10 |
+
- key: PYTHON_VERSION
|
11 |
+
value: 3.10.11
|
12 |
+
region: oregon
|
13 |
+
plan: standard
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==1.1.1
|
2 |
+
Flask==3.1.0
|
3 |
+
torch==2.5.1
|
4 |
+
torchvision==0.20.1
|
5 |
+
transformers==4.37.2
|
6 |
+
tiktoken==0.6.0
|
7 |
+
verovio==4.3.1
|
8 |
+
gunicorn
|
runtime.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python-3.10.15
|