patharanor commited on
Commit
42a09ad
·
verified ·
1 Parent(s): f42d8b2

feat: host list file detection

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.traineddata filter=lfs diff=lfs merge=lfs -text
apis/layoutlm.py CHANGED
@@ -21,6 +21,23 @@ class LayoutLM:
21
 
22
  self.pipeline = self.tf_pipeline(self.pipeline_category, model=model)
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def answer_the_question(self, img, question: str, is_debug=False):
25
  score = 0
26
  answer = '-'
 
21
 
22
  self.pipeline = self.tf_pipeline(self.pipeline_category, model=model)
23
 
24
+ def answer_the_question_without_filter(self, img, question: str, is_debug=False, **kwargs):
25
+ answers = None
26
+
27
+ top_k = kwargs['top_k'] if kwargs.get('top_k') is not None else 1
28
+ max_answer_len = kwargs['max_answer_len'] if kwargs.get('max_answer_len') is not None else 15
29
+
30
+ if self.pipeline is not None:
31
+ answers = self.pipeline(img, question,
32
+ top_k=top_k,
33
+ max_answer_len=max_answer_len)
34
+
35
+ if is_debug:
36
+ print('--------------------')
37
+ print(answers)
38
+
39
+ return answers
40
+
41
  def answer_the_question(self, img, question: str, is_debug=False):
42
  score = 0
43
  answer = '-'
apis/qc3/host_list.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+ from PIL import Image
4
+ import pytesseract
5
+
6
+ class HostList:
7
+ def __init__(self, is_debug=False) -> None:
8
+ self.is_debug = is_debug
9
+
10
+ # Host List Style (hlstyle) configuration for pytesseract
11
+ # - psm means page segmentation (Ref. https://pyimagesearch.com/2021/11/15/tesseract-page-segmentation-modes-psms-explained-how-to-improve-your-ocr-accuracy/)
12
+ # - fixed slashed zero issue with custom traineddata: https://github.com/ReceiptManager/receipt-parser-server/tree/master/tessdata
13
+ self.hlstyle_config = r'--psm 6 --tessdata-dir ./tessdata -l eng_slashed_zeros'
14
+
15
+ def get_orientation(self, image: Image):
16
+ # detect orientation
17
+ osd = pytesseract.image_to_osd(image)
18
+ isrotate = re.search('(?<=Rotate: )\d+', osd)
19
+ isscript = re.search('(?<=Script: )\d+', osd)
20
+ angle = re.search('(?<=Rotate: )\d+', osd).group(0) if isrotate else None
21
+ script = re.search('(?<=Script: )\d+', osd).group(1) if isscript else None
22
+
23
+ if self.is_debug:
24
+ print("---------------------------------")
25
+ print(f"angle : {angle}")
26
+ print(f"script : {script}")
27
+
28
+ return (angle, script)
29
+
30
+ def post_processes(self, result: str):
31
+ data = dict()
32
+ obj = dict()
33
+ data['data'] = []
34
+ is_host = False
35
+ is_mid = False
36
+ is_tid = False
37
+
38
+ if self.is_debug:
39
+ print("---------------------------------")
40
+ print("post-processes:\n")
41
+
42
+ lines = result.splitlines()
43
+ for line in lines:
44
+ if re.search(r'(\:)', line):
45
+ infos = line.split(':')[1]
46
+
47
+ # # Clear end line character
48
+ # if len(infos) > 1:
49
+ # infos.pop()
50
+
51
+ # # Merge all
52
+ # infos = ''.join(infos)
53
+ print(infos)
54
+
55
+ # Is alphabet or numeric ?
56
+ if re.search(r'[a-zA-Z0-9]+', infos):
57
+ if not is_host and not is_mid and not is_tid:
58
+ is_host = True
59
+ obj['host'] = re.sub('\W', '', infos)
60
+ elif is_host and not is_mid and not is_tid:
61
+ is_mid = True
62
+ obj['mid'] = max(infos.split(' '), key=len)
63
+ elif is_host and is_mid and not is_tid:
64
+ is_tid = True
65
+ obj['tid'] = max(infos.split(' '), key=len)
66
+
67
+ if is_host and is_mid and is_tid:
68
+ data['data'].append(obj.copy())
69
+ print(json.dumps(obj))
70
+ is_host = False
71
+ is_mid = False
72
+ is_tid = False
73
+
74
+ obj.clear()
75
+
76
+ print(json.dumps(data))
77
+
78
+ return f'{result}\n-------------------\n{json.dumps(data, indent=2)}'
79
+
80
+ def process_image(self, image: Image):
81
+ string = pytesseract.image_to_string(image, config=self.hlstyle_config)
82
+ string = self.post_processes(string)
83
+ return f'{string}'
app.py CHANGED
@@ -1,9 +1,12 @@
1
  from apis.layoutlm import LayoutLM
 
 
2
  import pandas as pd
3
  import gradio as gr
4
  import os
5
 
6
  layoutlm = None
 
7
 
8
  def auth(username, password):
9
  u = os.environ.get('USERNAME')
@@ -13,8 +16,31 @@ def auth(username, password):
13
  def inference(img) -> pd.DataFrame:
14
  return layoutlm.inference(img)
15
 
16
- def ask(img, question) -> str:
17
- return layoutlm.answer_the_question(img, question, is_debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  if __name__ == '__main__':
20
 
@@ -24,26 +50,54 @@ if __name__ == '__main__':
24
 
25
  with gr.Blocks() as demo:
26
 
27
- with gr.Row():
28
- inp_img = gr.Image(type='pil')
29
-
30
- with gr.Column():
31
- out = gr.Dataframe(
32
- headers=['Data', 'Value'],
33
- datatype=['str', 'str'],
34
- row_count=8,
35
- col_count=(2, 'fixed'),
36
- interactive=False
37
- )
38
-
39
- txt_custom_question = gr.Textbox(label='Your question')
40
- btn_ask = gr.Button('Ask me')
41
- txt_out_answer = gr.Textbox(label='Answer', interactive=False)
42
-
43
- # event
44
- inp_img.change(inference, inp_img, out)
45
- btn_ask.click(ask, [inp_img, txt_custom_question], txt_out_answer)
46
-
47
- demo.launch(auth=auth)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  except Exception as e:
49
  print(str(e))
 
1
  from apis.layoutlm import LayoutLM
2
+ from apis.qc3.host_list import HostList
3
+ from PIL import Image
4
  import pandas as pd
5
  import gradio as gr
6
  import os
7
 
8
  layoutlm = None
9
+ hl = HostList(is_debug=True)
10
 
11
  def auth(username, password):
12
  u = os.environ.get('USERNAME')
 
16
  def inference(img) -> pd.DataFrame:
17
  return layoutlm.inference(img)
18
 
19
+ def filter_green_out(img: Image):
20
+ image_data = img.load()
21
+ height,width = img.size
22
+ for loop1 in range(height):
23
+ for loop2 in range(width):
24
+ (r,g,b) = image_data[loop1,loop2]
25
+ if g < 70 and r < 70 and b < 70:
26
+ (r,g,b) = (0,0,0)
27
+ else:
28
+ (r,g,b) = (255,255,255)
29
+ image_data[loop1,loop2] = r,g,b
30
+
31
+ img.save('./temp.jpg')
32
+ return img
33
+
34
+ def ask(img: Image, question, top_k, max_answer_len, chk_is_remove_green) -> str:
35
+ if chk_is_remove_green:
36
+ img = filter_green_out(img.copy())
37
+
38
+ return layoutlm.answer_the_question_without_filter(
39
+ img,
40
+ question,
41
+ top_k=top_k,
42
+ max_answer_len=max_answer_len,
43
+ is_debug=True)
44
 
45
  if __name__ == '__main__':
46
 
 
50
 
51
  with gr.Blocks() as demo:
52
 
53
+ with gr.Tab('List'):
54
+ with gr.Row():
55
+ with gr.Column():
56
+ list_inp_img = gr.Image(type="pil")
57
+ gr.Examples(
58
+ [['./examples/host-list1.JPG'], ['./examples/host-list2.JPG', './examples/host-list3.JPG']],
59
+ list_inp_img
60
+ )
61
+
62
+ with gr.Column():
63
+ list_out_txt = gr.Textbox(label='Answer', interactive=False)
64
+
65
+ list_btn_ask = gr.Button('Ask me')
66
+ list_btn_ask.click(hl.process_image, [
67
+ list_inp_img
68
+ ], list_out_txt)
69
+
70
+ with gr.Tab('Layout'):
71
+ with gr.Row():
72
+ inp_img = gr.Image(type='pil')
73
+
74
+ with gr.Column():
75
+ out = gr.Dataframe(
76
+ headers=['Data', 'Value'],
77
+ datatype=['str', 'str'],
78
+ row_count=8,
79
+ col_count=(2, 'fixed'),
80
+ interactive=False
81
+ )
82
+
83
+ txt_custom_question = gr.Textbox(label='Your question')
84
+ sld_max_answer = gr.Slider(1, 10, value=1, step=1, label="Max answer", info="Top-K between 1 and 10")
85
+ sld_max_answer_len = gr.Slider(1, 200, value=15, step=1, label="Max answer length", info="Length between 15 and 200")
86
+ chk_is_remove_green = gr.Checkbox(label="Remove green", info="Do you need clean context?")
87
+ btn_ask = gr.Button('Ask me')
88
+ txt_out_answer = gr.Textbox(label='Answer', interactive=False)
89
+
90
+ # event
91
+ inp_img.change(inference, inp_img, out)
92
+ btn_ask.click(ask, [
93
+ inp_img,
94
+ txt_custom_question,
95
+ sld_max_answer,
96
+ sld_max_answer_len,
97
+ chk_is_remove_green
98
+ ], txt_out_answer)
99
+
100
+ #demo.launch(auth=auth)
101
+ demo.launch()
102
  except Exception as e:
103
  print(str(e))
examples/ex-invoice-1.png ADDED
examples/ex-invoice-2.png ADDED
examples/host-list1.JPG ADDED
examples/host-list2.JPG ADDED
examples/host-list3.JPG ADDED
requirements.txt CHANGED
@@ -4,4 +4,4 @@ torch==2.2.0
4
  pytesseract==0.3.10
5
  Pillow==10.0
6
  gradio==4.19.0
7
- pandas==2.2.0
 
4
  pytesseract==0.3.10
5
  Pillow==10.0
6
  gradio==4.19.0
7
+ pandas==2.2.0
tessdata/eng_slashed_zeros.traineddata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d3edb3b6bddf3d15d80b36ad9c1203d1289ead1e6b9d4bbb006357a267a2b3a
3
+ size 15858079