Spaces:

mike23415
/

ORC

Running

App Files Files Community

mike23415 commited on 27 days ago

Commit

40c3856

verified ·

1 Parent(s): b95ef79

Create app.py

Browse files

Files changed (1) hide show

app.py +129 -0

app.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import os
+import uuid
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+from werkzeug.utils import secure_filename
+import torch
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from PIL import Image
+import cv2
+import numpy as np
+app = Flask(__name__)
+CORS(app)
+# Configure upload folder
+UPLOAD_FOLDER = 'uploads'
+ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'pdf', 'tif', 'tiff'}
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max upload
+# Create uploads directory if it doesn't exist
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+# Load OCR model - Microsoft's Donut model
+processor = DonutProcessor.from_pretrained("microsoft/donut-base")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/donut-base")
+# Move model to GPU if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+def allowed_file(filename):
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+def preprocess_image(image_path):
+    # Open image with PIL
+    image = Image.open(image_path).convert("RGB")
+    # Basic enhancement for better OCR results
+    # Convert to OpenCV format for preprocessing
+    img = np.array(image)
+    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+    # Apply adaptive thresholding to handle varying lighting conditions
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                  cv2.THRESH_BINARY, 11, 2)
+    # Convert back to PIL
+    enhanced_image = Image.fromarray(cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB))
+    return enhanced_image
+def perform_ocr(image_path):
+    # Preprocess the image
+    image = preprocess_image(image_path)
+    # Prepare image for the model
+    pixel_values = processor(image, return_tensors="pt").pixel_values.to(device)
+    # Generate text
+    task_prompt = "<s_ocr>"
+    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids.to(device)
+    outputs = model.generate(
+        pixel_values,
+        decoder_input_ids=decoder_input_ids,
+        max_length=model.decoder.config.max_position_embeddings,
+        early_stopping=True,
+        pad_token_id=processor.tokenizer.pad_token_id,
+        eos_token_id=processor.tokenizer.eos_token_id,
+        use_cache=True,
+        num_beams=5,
+        bad_words_ids=[[processor.tokenizer.unk_token_id]],
+        return_dict_in_generate=True,
+    )
+    # Decode generated text
+    sequence = processor.batch_decode(outputs.sequences)[0]
+    sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+    sequence = sequence.replace("<s>", "").replace("</s>", "").replace("<s_ocr>", "").replace("</s_ocr>", "")
+    return sequence.strip()
+@app.route('/ocr', methods=['POST'])
+def ocr():
+    # Check if a file was uploaded
+    if 'file' not in request.files:
+        return jsonify({'error': 'No file part'}), 400
+    file = request.files['file']
+    # Check if filename is empty
+    if file.filename == '':
+        return jsonify({'error': 'No selected file'}), 400
+    # Check if file type is allowed
+    if file and allowed_file(file.filename):
+        # Create a unique filename
+        filename = str(uuid.uuid4()) + '_' + secure_filename(file.filename)
+        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        # Save the file
+        file.save(file_path)
+        try:
+            # Perform OCR
+            extracted_text = perform_ocr(file_path)
+            # Clean up the file if needed
+            # os.remove(file_path)
+            return jsonify({
+                'success': True,
+                'text': extracted_text
+            })
+        except Exception as e:
+            return jsonify({
+                'success': False,
+                'error': str(e)
+            }), 500
+    else:
+        return jsonify({'error': 'File type not allowed'}), 400
+@app.route('/health', methods=['GET'])
+def health_check():
+    return jsonify({'status': 'healthy'}), 200
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000, debug=False