File size: 1,345 Bytes
b692870 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import easyocr
import cv2
import os
def extract_text_from_image(image_path, language='en'):
"""
Extracts text from an image using EasyOCR.
Args:
image_path (str): Path to the image file.
language (str, optional): Language(s) to be recognized. Defaults to 'en' (English).
Returns:
list: List of recognized text strings.
"""
reader = easyocr.Reader([language])
reader.detector = reader.initDetector('best\BEST.pth')
image = cv2.imread(image_path)
result = reader.readtext(image, detail=0) # Extract only recognized texts
return result
if __name__ == '__main__':
# Define the folder path containing images
folder_path = "inference_results\Anil Maheshwari - Data analytics-McGraw-Hill Education (2017)"
# Create an empty string to store all concatenated text
all_extracted_text = ""
# Loop through all files in the folder
for filename in os.listdir(folder_path):
if filename.endswith(".jpg") or filename.endswith(".png"):
image_path = os.path.join(folder_path, filename)
# Extract text for current image
extracted_text = extract_text_from_image(image_path)
# Concatenate extracted text with a newline character
all_extracted_text += "\n".join(extracted_text) + "\n\n" # Add double newlines for separation
|