Spaces:
Running
Running
File size: 4,087 Bytes
81e13bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import os
import pandas as pd
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./titanium-scope-436311-t3-966373f5aa2f.json"
def run_tesseract_on_image(image_path): # -> tsv output path
print("image_path",image_path)
image_name = os.path.basename(image_path)
image_name = image_name[:image_name.find('.')]
error_code = os.system(f'''
tesseract "{image_path}" "/content/{image_name}" -l eng tsv
''')
if not error_code:
return f"/content/{image_name}.tsv"
else:
raise ValueError('Tesseract OCR Error please verify image format PNG,JPG,JPEG')
def clean_tesseract_output(tsv_output_path):
print("tsv_output_path",tsv_output_path)
ocr_df = pd.read_csv(tsv_output_path, sep='\t')
ocr_df = ocr_df.dropna()
ocr_df = ocr_df.drop(ocr_df[ocr_df.text.str.strip() == ''].index)
text_output = ' '.join(ocr_df.text.tolist())
words = []
for index, row in ocr_df.iterrows():
word = {}
origin_box = [row['left'], row['top'], row['left'] +
row['width'], row['top']+row['height']]
word['word_text'] = row['text']
word['word_box'] = origin_box
words.append(word)
return words
def detect_text(path):
print("this is path:",path)
"""Detects text in the file."""
from google.cloud import vision
client = vision.ImageAnnotatorClient()
with open(path, "rb") as image_file:
content = image_file.read()
image = vision.Image(content=content)
response = client.text_detection(image=image)
texts = response.text_annotations
print("Texts:")
list_of_dict = []
for text in texts[1:]:
data_dic = {}
print(f'\n"{text.description}"')
data_dic["word_text"] = text.description
vertices_list = [[int(vertex.x),int(vertex.y)] for vertex in text.bounding_poly.vertices]
print("vertices_list",vertices_list)
coords = vertices_list
sorted_coords = sorted(coords, key=lambda coord: (coord[0] + coord[1]))
# Top-left is the first in the sorted list (smallest sum of x, y)
top_left = sorted_coords[0]
# Bottom-right is the last in the sorted list (largest sum of x, y)
bottom_right = sorted_coords[-1]
ls = []
ls.append(top_left[0])
ls.append(top_left[1])
ls.append(bottom_right[0])
ls.append(bottom_right[1])
# print(ls)
# ls = []
# ls.append(vertices_list[0][0])
# ls.append(vertices_list[0][1])
# ls.append(vertices_list[2][0])
# ls.append(vertices_list[2][1])
data_dic["word_box"] = ls
list_of_dict.append(data_dic)
if response.error.message:
raise Exception(
"{}\nFor more info on error messages, check: "
"https://cloud.google.com/apis/design/errors".format(response.error.message)
)
return list_of_dict
def prepare_batch_for_inference(image_paths):
# tesseract_outputs is a list of paths
inference_batch = dict()
# tesseract_outputs = [run_tesseract_on_image(
# image_path) for image_path in image_paths]
# tesseract_outputs = []
# for image_path in image_paths:
# output = run_tesseract_on_image(image_path)
# tesseract_outputs.append(output)
# clean_outputs is a list of lists
# clean_outputs = [clean_tesseract_output(
# tsv_path) for tsv_path in tesseract_outputs]
# clean_outputs = []
# for tsv_path in tesseract_outputs:
# output = clean_tesseract_output(tsv_path)
# clean_outputs.append(output)
clean_outputs = []
for image_path in image_paths:
output = detect_text(image_path)
clean_outputs.append(output)
print("clean_outputs",clean_outputs)
word_lists = [[word['word_text'] for word in clean_output]
for clean_output in clean_outputs]
boxes_lists = [[word['word_box'] for word in clean_output]
for clean_output in clean_outputs]
inference_batch = {
"image_path": image_paths,
"bboxes": boxes_lists,
"words": word_lists
}
return inference_batch
|