Spaces:

patharanor
/

invex

Sleeping

App Files Files Community

patharanor commited on Mar 6, 2024

Commit

42a09ad

verified ·

1 Parent(s): f42d8b2

feat: host list file detection

Browse files

Files changed (11) hide show

.gitattributes +1 -0
apis/layoutlm.py +17 -0
apis/qc3/host_list.py +83 -0
app.py +77 -23
examples/ex-invoice-1.png +0 -0
examples/ex-invoice-2.png +0 -0
examples/host-list1.JPG +0 -0
examples/host-list2.JPG +0 -0
examples/host-list3.JPG +0 -0
requirements.txt +1 -1
tessdata/eng_slashed_zeros.traineddata +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.traineddata filter=lfs diff=lfs merge=lfs -text

apis/layoutlm.py CHANGED Viewed

@@ -21,6 +21,23 @@ class LayoutLM:
         self.pipeline = self.tf_pipeline(self.pipeline_category, model=model)
     def answer_the_question(self, img, question: str, is_debug=False):
         score = 0
         answer = '-'

         self.pipeline = self.tf_pipeline(self.pipeline_category, model=model)
+    def answer_the_question_without_filter(self, img, question: str, is_debug=False, **kwargs):
+        answers = None
+        top_k = kwargs['top_k'] if kwargs.get('top_k') is not None else 1
+        max_answer_len = kwargs['max_answer_len'] if kwargs.get('max_answer_len') is not None else 15
+        if self.pipeline is not None:
+            answers = self.pipeline(img, question,
+                top_k=top_k,
+                max_answer_len=max_answer_len)
+        if is_debug:
+            print('--------------------')
+            print(answers)
+        return answers
     def answer_the_question(self, img, question: str, is_debug=False):
         score = 0
         answer = '-'

apis/qc3/host_list.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import re
+import json
+from PIL import Image
+import pytesseract
+class HostList:
+    def __init__(self, is_debug=False) -> None:
+        self.is_debug = is_debug
+        # Host List Style (hlstyle) configuration for pytesseract
+        # - psm means page segmentation (Ref. https://pyimagesearch.com/2021/11/15/tesseract-page-segmentation-modes-psms-explained-how-to-improve-your-ocr-accuracy/)
+        # - fixed slashed zero issue with custom traineddata: https://github.com/ReceiptManager/receipt-parser-server/tree/master/tessdata
+        self.hlstyle_config = r'--psm 6 --tessdata-dir ./tessdata -l eng_slashed_zeros'
+    def get_orientation(self, image: Image):
+        # detect orientation
+        osd = pytesseract.image_to_osd(image)
+        isrotate = re.search('(?<=Rotate: )\d+', osd)
+        isscript = re.search('(?<=Script: )\d+', osd)
+        angle = re.search('(?<=Rotate: )\d+', osd).group(0) if isrotate else None
+        script = re.search('(?<=Script: )\d+', osd).group(1) if isscript else None
+        if self.is_debug:
+            print("---------------------------------")
+            print(f"angle : {angle}")
+            print(f"script : {script}")
+        return (angle, script)
+    def post_processes(self, result: str):
+        data = dict()
+        obj = dict()
+        data['data'] = []
+        is_host = False
+        is_mid = False
+        is_tid = False
+        if self.is_debug:
+            print("---------------------------------")
+            print("post-processes:\n")
+        lines = result.splitlines()
+        for line in lines:
+            if re.search(r'(\:)', line):
+                infos = line.split(':')[1]
+                # # Clear end line character
+                # if len(infos) > 1:
+                #     infos.pop()
+                # # Merge all
+                # infos = ''.join(infos)
+                print(infos)
+                # Is alphabet or numeric ?
+                if re.search(r'[a-zA-Z0-9]+', infos):
+                    if not is_host and not is_mid and not is_tid:
+                        is_host = True
+                        obj['host'] = re.sub('\W', '', infos)
+                    elif is_host and not is_mid and not is_tid:
+                        is_mid = True
+                        obj['mid'] = max(infos.split(' '), key=len)
+                    elif is_host and is_mid and not is_tid:
+                        is_tid = True
+                        obj['tid'] = max(infos.split(' '), key=len)
+                    if is_host and is_mid and is_tid:
+                        data['data'].append(obj.copy())
+                        print(json.dumps(obj))
+                        is_host = False
+                        is_mid = False
+                        is_tid = False
+                        obj.clear()
+        print(json.dumps(data))
+        return f'{result}\n-------------------\n{json.dumps(data, indent=2)}'
+    def process_image(self, image: Image):
+        string = pytesseract.image_to_string(image, config=self.hlstyle_config)
+        string = self.post_processes(string)
+        return f'{string}'

app.py CHANGED Viewed

@@ -1,9 +1,12 @@
 from apis.layoutlm import LayoutLM
 import pandas as pd
 import gradio as gr
 import os
 layoutlm = None
 def auth(username, password):
     u = os.environ.get('USERNAME')
@@ -13,8 +16,31 @@ def auth(username, password):
 def inference(img) -> pd.DataFrame:
     return layoutlm.inference(img)
-def ask(img, question) -> str:
-    return layoutlm.answer_the_question(img, question, is_debug=True)
 if __name__ == '__main__':
@@ -24,26 +50,54 @@ if __name__ == '__main__':
         with gr.Blocks() as demo:
-            with gr.Row():
-                inp_img = gr.Image(type='pil')
-                with gr.Column():
-                    out = gr.Dataframe(
-                        headers=['Data', 'Value'],
-                        datatype=['str', 'str'],
-                        row_count=8,
-                        col_count=(2, 'fixed'),
-                        interactive=False
-                    )
-                    txt_custom_question = gr.Textbox(label='Your question')
-                    btn_ask = gr.Button('Ask me')
-                    txt_out_answer = gr.Textbox(label='Answer', interactive=False)
-                    # event
-                    inp_img.change(inference, inp_img, out)
-                    btn_ask.click(ask, [inp_img, txt_custom_question], txt_out_answer)
-        demo.launch(auth=auth)
     except Exception as e:
         print(str(e))

 from apis.layoutlm import LayoutLM
+from apis.qc3.host_list import HostList
+from PIL import Image
 import pandas as pd
 import gradio as gr
 import os
 layoutlm = None
+hl = HostList(is_debug=True)
 def auth(username, password):
     u = os.environ.get('USERNAME')
 def inference(img) -> pd.DataFrame:
     return layoutlm.inference(img)
+def filter_green_out(img: Image):
+    image_data = img.load()
+    height,width = img.size
+    for loop1 in range(height):
+        for loop2 in range(width):
+            (r,g,b) = image_data[loop1,loop2]
+            if g < 70 and r < 70 and b < 70:
+                (r,g,b) = (0,0,0)
+            else:
+                (r,g,b) = (255,255,255)
+            image_data[loop1,loop2] = r,g,b
+    img.save('./temp.jpg')
+    return img
+def ask(img: Image, question, top_k, max_answer_len, chk_is_remove_green) -> str:
+    if chk_is_remove_green:
+        img = filter_green_out(img.copy())
+    return layoutlm.answer_the_question_without_filter(
+        img,
+        question,
+        top_k=top_k,
+        max_answer_len=max_answer_len,
+        is_debug=True)
 if __name__ == '__main__':
         with gr.Blocks() as demo:
+            with gr.Tab('List'):
+                with gr.Row():
+                    with gr.Column():
+                        list_inp_img = gr.Image(type="pil")
+                        gr.Examples(
+                            [['./examples/host-list1.JPG'], ['./examples/host-list2.JPG', './examples/host-list3.JPG']],
+                            list_inp_img
+                        )
+                    with gr.Column():
+                        list_out_txt = gr.Textbox(label='Answer', interactive=False)
+                        list_btn_ask = gr.Button('Ask me')
+                        list_btn_ask.click(hl.process_image, [
+                            list_inp_img
+                        ], list_out_txt)
+            with gr.Tab('Layout'):
+                with gr.Row():
+                    inp_img = gr.Image(type='pil')
+                    with gr.Column():
+                        out = gr.Dataframe(
+                            headers=['Data', 'Value'],
+                            datatype=['str', 'str'],
+                            row_count=8,
+                            col_count=(2, 'fixed'),
+                            interactive=False
+                        )
+                        txt_custom_question = gr.Textbox(label='Your question')
+                        sld_max_answer = gr.Slider(1, 10, value=1, step=1, label="Max answer", info="Top-K between 1 and 10")
+                        sld_max_answer_len = gr.Slider(1, 200, value=15, step=1, label="Max answer length", info="Length between 15 and 200")
+                        chk_is_remove_green = gr.Checkbox(label="Remove green", info="Do you need clean context?")
+                        btn_ask = gr.Button('Ask me')
+                        txt_out_answer = gr.Textbox(label='Answer', interactive=False)
+                        # event
+                        inp_img.change(inference, inp_img, out)
+                        btn_ask.click(ask, [
+                            inp_img,
+                            txt_custom_question,
+                            sld_max_answer,
+                            sld_max_answer_len,
+                            chk_is_remove_green
+                        ], txt_out_answer)
+        #demo.launch(auth=auth)
+        demo.launch()
     except Exception as e:
         print(str(e))

examples/ex-invoice-1.png ADDED Viewed

examples/ex-invoice-2.png ADDED Viewed

examples/host-list1.JPG ADDED Viewed

examples/host-list2.JPG ADDED Viewed

examples/host-list3.JPG ADDED Viewed

requirements.txt CHANGED Viewed

@@ -4,4 +4,4 @@ torch==2.2.0
 pytesseract==0.3.10
 Pillow==10.0
 gradio==4.19.0
-pandas==2.2.0

 pytesseract==0.3.10
 Pillow==10.0
 gradio==4.19.0
+pandas==2.2.0

tessdata/eng_slashed_zeros.traineddata ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d3edb3b6bddf3d15d80b36ad9c1203d1289ead1e6b9d4bbb006357a267a2b3a
+size 15858079