File size: 1,345 Bytes
b692870
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import easyocr
import cv2
import os


def extract_text_from_image(image_path, language='en'):
  """

  Extracts text from an image using EasyOCR.



  Args:

      image_path (str): Path to the image file.

      language (str, optional): Language(s) to be recognized. Defaults to 'en' (English).



  Returns:

      list: List of recognized text strings.

  """

  reader = easyocr.Reader([language])
  reader.detector = reader.initDetector('best\BEST.pth')

  image = cv2.imread(image_path)
  result = reader.readtext(image, detail=0)  # Extract only recognized texts

  return result


if __name__ == '__main__':
  # Define the folder path containing images
  folder_path = "inference_results\Anil Maheshwari - Data analytics-McGraw-Hill Education (2017)"

  # Create an empty string to store all concatenated text
  all_extracted_text = ""

  # Loop through all files in the folder
  for filename in os.listdir(folder_path):
    if filename.endswith(".jpg") or filename.endswith(".png"):
      image_path = os.path.join(folder_path, filename)

      # Extract text for current image
      extracted_text = extract_text_from_image(image_path)

      # Concatenate extracted text with a newline character
      all_extracted_text += "\n".join(extracted_text) + "\n\n"  # Add double newlines for separation