File size: 4,087 Bytes
81e13bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import pandas as pd

import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./titanium-scope-436311-t3-966373f5aa2f.json"




def run_tesseract_on_image(image_path):  # -> tsv output path
  print("image_path",image_path)
  image_name = os.path.basename(image_path)
  image_name = image_name[:image_name.find('.')]
  error_code = os.system(f'''
  tesseract "{image_path}" "/content/{image_name}" -l eng tsv
  ''')
  if not error_code:
    return f"/content/{image_name}.tsv"
  else:
    raise ValueError('Tesseract OCR Error please verify image format PNG,JPG,JPEG')


def clean_tesseract_output(tsv_output_path):
  print("tsv_output_path",tsv_output_path)
  ocr_df = pd.read_csv(tsv_output_path, sep='\t')
  ocr_df = ocr_df.dropna()
  ocr_df = ocr_df.drop(ocr_df[ocr_df.text.str.strip() == ''].index)
  text_output = ' '.join(ocr_df.text.tolist())
  words = []
  for index, row in ocr_df.iterrows():
    word = {}
    origin_box = [row['left'], row['top'], row['left'] +
                  row['width'], row['top']+row['height']]
    word['word_text'] = row['text']
    word['word_box'] = origin_box
    words.append(word)
  return words




def detect_text(path):
    print("this is path:",path)
  
    """Detects text in the file."""
    from google.cloud import vision
    client = vision.ImageAnnotatorClient()
    with open(path, "rb") as image_file:
        content = image_file.read()
    image = vision.Image(content=content)
    response = client.text_detection(image=image)
    texts = response.text_annotations
    print("Texts:")
    list_of_dict = []
    for text in texts[1:]:
        data_dic = {}
        print(f'\n"{text.description}"')
        data_dic["word_text"] = text.description

        vertices_list = [[int(vertex.x),int(vertex.y)] for vertex in text.bounding_poly.vertices]
        print("vertices_list",vertices_list)

        
        coords = vertices_list 

        sorted_coords = sorted(coords, key=lambda coord: (coord[0] + coord[1]))

        # Top-left is the first in the sorted list (smallest sum of x, y)
        top_left = sorted_coords[0]

        # Bottom-right is the last in the sorted list (largest sum of x, y)
        bottom_right = sorted_coords[-1]

        ls = []
        ls.append(top_left[0])
        ls.append(top_left[1])
        ls.append(bottom_right[0])
        ls.append(bottom_right[1])

        # print(ls)

        # ls = []
        
        # ls.append(vertices_list[0][0])
        # ls.append(vertices_list[0][1])
        # ls.append(vertices_list[2][0])
        # ls.append(vertices_list[2][1])

        data_dic["word_box"] = ls

        list_of_dict.append(data_dic)

    if response.error.message:
        raise Exception(
            "{}\nFor more info on error messages, check: "
            "https://cloud.google.com/apis/design/errors".format(response.error.message)
        )

    return list_of_dict

  


def prepare_batch_for_inference(image_paths):
  # tesseract_outputs is a list of paths
  inference_batch = dict()
  # tesseract_outputs = [run_tesseract_on_image(
  #     image_path) for image_path in image_paths]
  
  # tesseract_outputs = []
  # for image_path in image_paths:
    
  #   output = run_tesseract_on_image(image_path)
  #   tesseract_outputs.append(output)

  # clean_outputs is a list of lists
  # clean_outputs = [clean_tesseract_output(
      # tsv_path) for tsv_path in tesseract_outputs]

  # clean_outputs = []
  # for tsv_path in tesseract_outputs:
  #   output = clean_tesseract_output(tsv_path)
  #   clean_outputs.append(output)


  clean_outputs = []
  for image_path in image_paths:
    
    output = detect_text(image_path)
    clean_outputs.append(output)

  print("clean_outputs",clean_outputs)

  
  word_lists = [[word['word_text'] for word in clean_output]
                for clean_output in clean_outputs]
  boxes_lists = [[word['word_box'] for word in clean_output]
                 for clean_output in clean_outputs]
  inference_batch = {
      "image_path": image_paths,
      "bboxes": boxes_lists,
      "words": word_lists
  }
  return inference_batch