import os | |
import cv2 | |
import numpy as np | |
from PIL import Image, ImageDraw, ImageFont | |
from tqdm import tqdm | |
import os | |
import easyocr | |
models_dir = "./models" | |
images_dir = "./images" | |
output_dir = "./output" | |
dirs = [models_dir, images_dir, output_dir] | |
for d in dirs: | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
class KZReader: | |
def __init__(self): | |
self.reader = easyocr.Reader( | |
['en'], | |
gpu=True, | |
recog_network='best_norm_ED', | |
detect_network="craft", | |
user_network_directory=models_dir, | |
model_storage_directory=models_dir, | |
) # this needs to run only once to load the model into memory | |
def readtext(self, image,paragraph): | |
result = self.reader.readtext(image = image, paragraph=True) | |
return result | |
""" | |
Upload easy OCR model files with the same name and font file named Ubuntu-Regular.ttf, examples: | |
best_norm_ED.pth | | | |
best_norm_ED.yaml | |
Ubuntu-Regular.ttf | |
to models directory | |
Upload image files you want to test, examples: | |
kz_book_simple.jpeg | |
kz_blur.jpg | |
kz_book_complex.jpg | |
to images directory | |
""" | |
''' | |
font_path = models_dir + "/Ubuntu-Regular.ttf" | |
reader = easyocr.Reader( | |
['en'], | |
gpu=True, | |
recog_network='best_norm_ED', | |
detect_network="craft", | |
user_network_directory=models_dir, | |
model_storage_directory=models_dir, | |
) # this needs to run only once to load the model into memory | |
image_extensions = (".jpg", ".jpeg", ".png") | |
''' | |
''' | |
for image_name in tqdm(os.listdir(images_dir)): | |
if not image_name.lower().endswith(image_extensions): | |
print(f'unsupported file {image_name}') | |
continue | |
image_path = f'{images_dir}/{image_name}' | |
print(image_path) | |
# Read image as numpy array | |
image = cv2.imread(image_path) | |
# Rotate the image by 270 degrees | |
# image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) | |
# Convert the image from BGR to RGB (because OpenCV loads images in BGR format) | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
results = reader.readtext(image=image) | |
# Load custom font | |
font = ImageFont.truetype(font_path, 32) | |
# Display the results | |
for (bbox, text, prob) in results: | |
# Get the bounding box coordinates | |
(top_left, top_right, bottom_right, bottom_left) = bbox | |
top_left = (int(top_left[0]), int(top_left[1])) | |
bottom_right = (int(bottom_right[0]), int(bottom_right[1])) | |
# Draw the bounding box on the image | |
cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2) | |
# Convert the OpenCV image to a PIL image, draw the text, then convert back to an OpenCV image | |
image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) | |
draw = ImageDraw.Draw(image_pil) | |
draw.text((top_left[0], top_left[1] - 40), text, font=font, fill=(0, 0, 255)) | |
image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR) | |
# Save image | |
cv2.imwrite( f'{output_dir}/{image_name}', image) | |
# reader.readtext(image = image, paragraph=True) | |
''' |