Spaces:

mike23415
/

ORC

Running

App Files Files Community

ORC / app.py

mike23415

Update app.py

c72429b verified about 1 month ago

raw

history blame

3.9 kB

	import os
	import uuid
	from flask import Flask, request, jsonify
	from flask_cors import CORS
	from werkzeug.utils import secure_filename
	import torch
	from transformers import TrOCRProcessor, VisionEncoderDecoderModel
	from PIL import Image
	import cv2
	import numpy as np

	app = Flask(__name__)
	CORS(app)

	# Configure upload folder
	UPLOAD_FOLDER = 'uploads'
	ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'pdf', 'tif', 'tiff'}
	app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
	app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max upload

	# Create uploads directory if it doesn't exist
	os.makedirs(UPLOAD_FOLDER, exist_ok=True)

	# Load OCR model - Microsoft's TrOCR model
	print("Loading OCR model...")
	processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten", cache_dir="/huggingface_cache")
	model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten", cache_dir="/huggingface_cache")

	# Move model to GPU if available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {device}")
	model.to(device)

	def allowed_file(filename):
	return '.' in filename and \
	filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

	def preprocess_image(image_path):
	# Open image with PIL
	image = Image.open(image_path).convert("RGB")

	# Basic enhancement for better OCR results
	# Convert to OpenCV format for preprocessing
	img = np.array(image)
	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

	# Apply adaptive thresholding to handle varying lighting conditions
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY, 11, 2)

	# Convert back to PIL
	enhanced_image = Image.fromarray(cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB))
	return enhanced_image

	def perform_ocr(image_path):
	# Preprocess the image
	image = preprocess_image(image_path)

	# Prepare image for the model
	pixel_values = processor(image, return_tensors="pt").pixel_values.to(device)

	# Generate text
	generated_ids = model.generate(
	pixel_values,
	max_length=64,
	num_beams=5,
	early_stopping=True
	)

	# Decode generated text
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return generated_text.strip()

	@app.route('/ocr', methods=['POST'])
	def ocr():
	# Check if a file was uploaded
	if 'file' not in request.files:
	return jsonify({'error': 'No file part'}), 400

	file = request.files['file']

	# Check if filename is empty
	if file.filename == '':
	return jsonify({'error': 'No selected file'}), 400

	# Check if file type is allowed
	if file and allowed_file(file.filename):
	# Create a unique filename
	filename = str(uuid.uuid4()) + '_' + secure_filename(file.filename)
	file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)

	# Save the file
	file.save(file_path)

	try:
	# Perform OCR
	extracted_text = perform_ocr(file_path)

	# Clean up the file after processing
	os.remove(file_path)

	return jsonify({
	'success': True,
	'text': extracted_text
	})
	except Exception as e:
	# Log the error
	print(f"Error processing image: {str(e)}")
	return jsonify({
	'success': False,
	'error': str(e)
	}), 500
	else:
	return jsonify({'error': 'File type not allowed'}), 400

	@app.route('/health', methods=['GET'])
	def health_check():
	return jsonify({'status': 'healthy'}), 200

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=5000, debug=False)