linjieccc commited on
Commit
5be463c
1 Parent(s): dbe6b8b

first model version

Browse files
.gitattributes CHANGED
@@ -29,3 +29,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ *.png filter=lfs diff=lfs merge=lfs -text
33
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,494 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #-*- coding: UTF-8 -*-
2
+ # Copyright 2022 The Impira Team and the HuggingFace Team.
3
+ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import os
18
+ import json
19
+ import base64
20
+ from io import BytesIO
21
+ from PIL import Image
22
+ import traceback
23
+
24
+ import requests
25
+ import numpy as np
26
+ import gradio as gr
27
+ import pdf2image
28
+ import fitz
29
+ import cv2
30
+
31
+ fitz_tools = fitz.Tools()
32
+
33
+
34
+ def pdf2img(stream, pagenos, dpi=300, thread_count=3, height=1600):
35
+ images = []
36
+ cimages = pdf2image.convert_from_bytes(
37
+ stream, dpi=dpi, thread_count=thread_count, first_page=pagenos[0] + 1, last_page=pagenos[-1] + 1,
38
+ size=height)
39
+ for _image in cimages:
40
+ image = np.array(_image)
41
+ image = image[..., ::-1]
42
+ images.append(image)
43
+ return images
44
+
45
+
46
+ class PdfReader(object):
47
+ """pdf reader"""
48
+ def __init__(self,
49
+ stream: bytes,
50
+ image_height: int = 1600):
51
+ self.stream = stream
52
+ self._image_height = image_height
53
+ self._dpi = 200
54
+ self._inpdf = self.load_file(stream)
55
+
56
+ @staticmethod
57
+ def load_file(stream):
58
+ """load document"""
59
+ try:
60
+ inpdf = fitz.Document(stream=stream, filetype="pdf")
61
+ except Exception as e:
62
+ print(f"[PDF_READER]-[Failed to load the file]-[{repr(e)}]")
63
+ return inpdf
64
+
65
+ @staticmethod
66
+ def _convert_page_obj_to_image(page_obj, image_height: int = None):
67
+ """fitz convert pdf to image
68
+
69
+ Args:
70
+ page_obj ([type]): [description]
71
+ ratio ([type]): [description]
72
+
73
+ Returns:
74
+ [type]: [description]
75
+ """
76
+ if image_height:
77
+ _, page_height = page_obj.rect.x1 - \
78
+ page_obj.rect.x0, page_obj.rect.y1 - page_obj.rect.y0
79
+ ratio = image_height / page_height
80
+ else:
81
+ ratio = 1.0
82
+ trans = fitz.Matrix(ratio, ratio)
83
+ pixmap = page_obj.get_pixmap(matrix=trans, alpha=False)
84
+ image = cv2.imdecode(np.frombuffer(pixmap.tobytes(), np.uint8), -1)
85
+ fitz_tools.store_shrink(100)
86
+ return image
87
+
88
+ def get_page_image(self,
89
+ pageno):
90
+ """get page image
91
+
92
+ Args:
93
+ pageno ([type]): [description]
94
+
95
+ Returns:
96
+ [type]: [description]
97
+ """
98
+ try:
99
+ page_obj = self._inpdf[pageno]
100
+ return self._convert_page_obj_to_image(page_obj, self._image_height)
101
+ except Exception as e:
102
+ print(f"[Failed to convert the PDF to images]-[{repr(e)}]")
103
+ try:
104
+ return pdf2img(stream=self.stream,
105
+ pagenos=[pageno],
106
+ height=self._image_height,
107
+ dpi=self._dpi)[0]
108
+ except Exception as e:
109
+ print(f"[Failed to convert the PDF to images]-[{repr(e)}]")
110
+ return None
111
+
112
+
113
+ examples = [
114
+ [
115
+ "invoice.jpg",
116
+ "发票号码是多少?",
117
+ ],
118
+ [
119
+ "resume.png",
120
+ "五百丁本次想要担任的是什么职位?",
121
+ ],
122
+ [
123
+ "custom_declaration_form.png",
124
+ "在哪个口岸进口?"
125
+ ],
126
+ [
127
+ "medical_bill_1.png",
128
+ "票据的具体名称是什么?"
129
+ ],
130
+ [
131
+ "budget_form.png",
132
+ "What is the total actual and/or obligated expenses of ECG Center?"
133
+ ],
134
+ [
135
+ "website_design_guide.jpeg",
136
+ "Which quality component has the icon of a pen in it?"
137
+ ],
138
+ [
139
+ "poster.png",
140
+ "Which gift idea needs a printer?"
141
+ ],
142
+ [
143
+ "medical_bill_2.png",
144
+ "患者さんは何でお金を払いますか。"
145
+ ],
146
+ [
147
+ "receipt.png",
148
+ "เบอร์โทรร้านอะไรคะ"
149
+ ],
150
+ ]
151
+
152
+ prompt_files = {
153
+ "发票号码是多少?": "invoice.jpg",
154
+ "五百丁本次想要担任的是什么职位?": "resume.png",
155
+ "在哪个口岸进口?": "custom_declaration_form.png",
156
+ "票据的具体名称是什么?": "medical_bill_1.png",
157
+ "What is the total actual and/or obligated expenses of ECG Center?": "budget_form.png",
158
+ "Which quality component has the icon of a pen in it?": "website_design_guide.jpeg",
159
+ "Which gift idea needs a printer?": "poster.png",
160
+ "患者さんは何でお金を払いますか。": "medical_bill_2.png",
161
+ "เบอร์โทรร้านอะไรคะ": "receipt.png"
162
+ }
163
+
164
+
165
+ def load_document(path):
166
+ if path.startswith("http://") or path.startswith("https://"):
167
+ resp = requests.get(path, allow_redirects=True, stream=True)
168
+ b = resp.raw
169
+ else:
170
+ b = open(path, "rb")
171
+
172
+ if path.endswith(".pdf"):
173
+ images_list = []
174
+ pdfreader = PdfReader(stream=b.read())
175
+ for p_no in range(0, pdfreader._inpdf.page_count):
176
+ img_np = pdfreader.get_page_image(pageno=p_no)
177
+ images_list.append(img_np)
178
+ else:
179
+ image = Image.open(b)
180
+ images_list = [image.convert("RGB")]
181
+ return images_list
182
+
183
+ def process_path(path):
184
+ error = None
185
+ if path:
186
+ try:
187
+ img = load_document(path)
188
+ return (
189
+ path,
190
+ gr.update(visible=True, value=img),
191
+ gr.update(visible=True),
192
+ gr.update(visible=False, value=None),
193
+ gr.update(visible=False, value=None),
194
+ None,
195
+ )
196
+ except Exception as e:
197
+ traceback.print_exc()
198
+ error = str(e)
199
+ return (
200
+ None,
201
+ gr.update(visible=False, value=None),
202
+ gr.update(visible=False),
203
+ gr.update(visible=False, value=None),
204
+ gr.update(visible=False, value=None),
205
+ gr.update(visible=True, value=error) if error is not None else None,
206
+ None,
207
+ )
208
+
209
+
210
+ def process_upload(file):
211
+ if file:
212
+ return process_path(file.name)
213
+ else:
214
+ return (
215
+ None,
216
+ gr.update(visible=False, value=None),
217
+ gr.update(visible=False),
218
+ gr.update(visible=False, value=None),
219
+ gr.update(visible=False, value=None),
220
+ None,
221
+ )
222
+
223
+
224
+ def process_prompt(prompt, document, lang="ch"):
225
+ if not prompt:
226
+ prompt = "发票号码是多少?"
227
+ if document is None:
228
+ return None, None, None
229
+
230
+ access_token = os.environ['token']
231
+
232
+ url = f"https://aip.baidubce.com/rpc/2.0/nlp-itec/poc/docprompt?access_token={access_token}"
233
+
234
+ r = requests.post(url, json={"doc": document, "prompt": [prompt], "lang": lang})
235
+ response = r.json()
236
+
237
+ predictions = response['result']
238
+ img_list = response['image']
239
+ pages = [Image.open(BytesIO(base64.b64decode(img))) for img in img_list]
240
+
241
+ text_value = predictions[0]['result'][0]['value']
242
+
243
+ return (
244
+ gr.update(visible=True, value=pages),
245
+ gr.update(visible=True, value=predictions),
246
+ gr.update(
247
+ visible=True,
248
+ value=text_value,
249
+ ),
250
+ )
251
+
252
+
253
+ def load_example_document(img, prompt):
254
+ if img is not None:
255
+ document = prompt_files[prompt]
256
+ preview, answer, answer_text = process_prompt(prompt, document)
257
+ return document, prompt, preview, gr.update(visible=True), answer, answer_text
258
+ else:
259
+ return None, None, None, gr.update(visible=False), None, None
260
+
261
+
262
+ def read_content(file_path: str) -> str:
263
+ """read the content of target file
264
+ """
265
+ with open(file_path, 'r', encoding='utf-8') as f:
266
+ content = f.read()
267
+
268
+ return content
269
+
270
+
271
+ CSS = """
272
+ #prompt input {
273
+ font-size: 16px;
274
+ }
275
+ #url-textbox {
276
+ padding: 0 !important;
277
+ }
278
+ #short-upload-box .w-full {
279
+ min-height: 10rem !important;
280
+ }
281
+ /* I think something like this can be used to re-shape
282
+ * the table
283
+ */
284
+ /*
285
+ .gr-samples-table tr {
286
+ display: inline;
287
+ }
288
+ .gr-samples-table .p-2 {
289
+ width: 100px;
290
+ }
291
+ */
292
+ #select-a-file {
293
+ width: 100%;
294
+ }
295
+ #file-clear {
296
+ padding-top: 2px !important;
297
+ padding-bottom: 2px !important;
298
+ padding-left: 8px !important;
299
+ padding-right: 8px !important;
300
+ margin-top: 10px;
301
+ }
302
+ .gradio-container .gr-button-primary {
303
+ background: linear-gradient(180deg, #CDF9BE 0%, #AFF497 100%);
304
+ border: 1px solid #B0DCCC;
305
+ border-radius: 8px;
306
+ color: #1B8700;
307
+ }
308
+ .gradio-container.dark button#submit-button {
309
+ background: linear-gradient(180deg, #CDF9BE 0%, #AFF497 100%);
310
+ border: 1px solid #B0DCCC;
311
+ border-radius: 8px;
312
+ color: #1B8700
313
+ }
314
+ table.gr-samples-table tr td {
315
+ border: none;
316
+ outline: none;
317
+ }
318
+ table.gr-samples-table tr td:first-of-type {
319
+ width: 0%;
320
+ }
321
+ div#short-upload-box div.absolute {
322
+ display: none !important;
323
+ }
324
+ gradio-app > div > div > div > div.w-full > div, .gradio-app > div > div > div > div.w-full > div {
325
+ gap: 0px 2%;
326
+ }
327
+ gradio-app div div div div.w-full, .gradio-app div div div div.w-full {
328
+ gap: 0px;
329
+ }
330
+ gradio-app h2, .gradio-app h2 {
331
+ padding-top: 10px;
332
+ }
333
+ #answer {
334
+ overflow-y: scroll;
335
+ color: white;
336
+ background: #666;
337
+ border-color: #666;
338
+ font-size: 20px;
339
+ font-weight: bold;
340
+ }
341
+ #answer span {
342
+ color: white;
343
+ }
344
+ #answer textarea {
345
+ color:white;
346
+ background: #777;
347
+ border-color: #777;
348
+ font-size: 18px;
349
+ }
350
+ #url-error input {
351
+ color: red;
352
+ }
353
+ """
354
+
355
+ with gr.Blocks(css=CSS) as demo:
356
+ gr.HTML(read_content("header.html"))
357
+ gr.Markdown(
358
+ f" ⚡DocPrompt⚡ is a Document Prompt Engine Powered by Baidu Wenxin Document Intelligence. 🚀\n"
359
+ f" To use it, simply upload an image or PDF, type the prompt, and click 'submit', or "
360
+ f" click one of the examples to load them."
361
+ )
362
+
363
+ document = gr.Variable()
364
+ example_prompt = gr.Textbox(visible=False)
365
+ example_image = gr.Image(visible=False)
366
+ with gr.Row(equal_height=True):
367
+ with gr.Column():
368
+ with gr.Row():
369
+ gr.Markdown("## 1. Select a file", elem_id="select-a-file")
370
+ img_clear_button = gr.Button(
371
+ "Clear", variant="secondary", elem_id="file-clear", visible=False
372
+ )
373
+ image = gr.Gallery(visible=False)
374
+ with gr.Row(equal_height=True):
375
+ with gr.Column():
376
+ with gr.Row():
377
+ url = gr.Textbox(
378
+ show_label=False,
379
+ placeholder="URL",
380
+ lines=1,
381
+ max_lines=1,
382
+ elem_id="url-textbox",
383
+ )
384
+ submit = gr.Button("Get")
385
+ url_error = gr.Textbox(
386
+ visible=False,
387
+ elem_id="url-error",
388
+ max_lines=1,
389
+ interactive=False,
390
+ label="Error",
391
+ )
392
+ gr.Markdown("— or —")
393
+ upload = gr.File(label=None, interactive=True, elem_id="short-upload-box")
394
+ gr.Examples(
395
+ examples=examples,
396
+ inputs=[example_image, example_prompt],
397
+ )
398
+
399
+ with gr.Column() as col:
400
+ gr.Markdown("## 2. Make a request")
401
+ prompt = gr.Textbox(
402
+ label="Prompt",
403
+ placeholder="发票号码是多少?",
404
+ lines=1,
405
+ max_lines=1,
406
+ )
407
+ ocr_lang = gr.Radio(
408
+ choices=["ch", "en"],
409
+ value="ch",
410
+ label="OCR Language",
411
+ )
412
+
413
+ with gr.Row():
414
+ clear_button = gr.Button("Clear", variant="secondary")
415
+ submit_button = gr.Button(
416
+ "Submit", variant="primary", elem_id="submit-button"
417
+ )
418
+ with gr.Column():
419
+ output_text = gr.Textbox(
420
+ label="Top Answer", visible=False, elem_id="answer"
421
+ )
422
+ output = gr.JSON(label="Output", visible=False)
423
+
424
+ for cb in [img_clear_button, clear_button]:
425
+ cb.click(
426
+ lambda _: (
427
+ gr.update(visible=False, value=None),
428
+ None,
429
+ gr.update(visible=False, value=None),
430
+ gr.update(visible=False, value=None),
431
+ gr.update(visible=False),
432
+ None,
433
+ None,
434
+ None,
435
+ gr.update(visible=False, value=None),
436
+ None,
437
+ ),
438
+ inputs=clear_button,
439
+ outputs=[
440
+ image,
441
+ document,
442
+ output,
443
+ output_text,
444
+ img_clear_button,
445
+ example_image,
446
+ upload,
447
+ url,
448
+ url_error,
449
+ prompt,
450
+ ],
451
+ )
452
+
453
+ upload.change(
454
+ fn=process_upload,
455
+ inputs=[upload],
456
+ outputs=[document, image, img_clear_button, output, output_text, url_error],
457
+ )
458
+ submit.click(
459
+ fn=process_path,
460
+ inputs=[url],
461
+ outputs=[document, image, img_clear_button, output, output_text, url_error],
462
+ )
463
+
464
+ prompt.submit(
465
+ fn=process_prompt,
466
+ inputs=[prompt, document],
467
+ outputs=[image, output, output_text],
468
+ )
469
+
470
+ submit_button.click(
471
+ fn=process_prompt,
472
+ inputs=[prompt, document, ocr_lang],
473
+ outputs=[image, output, output_text],
474
+ )
475
+
476
+ ocr_lang.change(
477
+ fn=process_prompt,
478
+ inputs=[prompt, document, ocr_lang],
479
+ outputs=[image, output, output_text],
480
+ )
481
+
482
+ example_image.change(
483
+ fn=load_example_document,
484
+ inputs=[example_image, example_prompt],
485
+ outputs=[document, prompt, image, img_clear_button, output, output_text],
486
+ )
487
+
488
+ gr.Image('./paddlenlp-preview.jpeg')
489
+ gr.Markdown("[![Stargazers repo roster for @PaddlePaddle/PaddleNLP](https://reporoster.com/stars/PaddlePaddle/PaddleNLP)](https://github.com/PaddlePaddle/PaddleNLP)")
490
+ gr.HTML(read_content("footer.html"))
491
+
492
+
493
+ if __name__ == "__main__":
494
+ demo.launch(enable_queue=False)
budget_form.png ADDED

Git LFS Details

  • SHA256: 77d90cf3568457853e8aa5e3e575266067a6d048c73388e751e5df562955e45e
  • Pointer size: 131 Bytes
  • Size of remote file: 386 kB
custom_declaration_form.png ADDED

Git LFS Details

  • SHA256: 0b7384eba50d2f817d8953d741f1b877e281142ff227771d7b16964a56e5c687
  • Pointer size: 131 Bytes
  • Size of remote file: 573 kB
footer.html ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <div class="footer">
2
+ <p>Model by <a href="https://github.com/PaddlePaddle/PaddleNLP" style="text-decoration: underline;" target="_blank">PaddleNLP</a> - Gradio Demo by 🤗 Hugging Face
3
+ </p>
4
+ </div>
header.html ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div style="text-align: center; max-width: 650px; margin: 0 auto;">
2
+ <div
3
+ style="
4
+ display: inline-flex;
5
+ gap: 0.8rem;
6
+ font-size: 1.75rem;
7
+ margin-bottom: 10px;
8
+ margin-left: 220px;
9
+ justify-content: center;
10
+ "
11
+ >
12
+ <a href="https://github.com/PaddlePaddle/PaddleNLP"><img src="https://user-images.githubusercontent.com/1371212/175816733-8ec25eb0-9af3-4380-9218-27c154518258.png" alt="PaddleNLP" width="60%"></a>
13
+ </div>
14
+ <div
15
+ style="
16
+ display: inline-flex;
17
+ align-items: center;
18
+ gap: 0.8rem;
19
+ font-size: 1.75rem;
20
+ margin-bottom: 10px;
21
+ justify-content: center;
22
+ ">
23
+ <a href="https://github.com/PaddlePaddle/PaddleNLP"><h1 style="font-weight: 900; margin-bottom: 7px;">
24
+ DocpPrompt 🚀
25
+ </h1></a>
26
+ </div>
27
+ <!-- <p style="margin-bottom: 10px; font-weight: 900; font-size: 100%">
28
+ ⚡DocPrompt⚡ is a Document Prompt Engine Powered by <br> Document Intelligence Technology in Baidu Wenxin<br>. 🚀 To use it, simply upload an image or PDF, type the prompt, and click 'submit', or click one of the examples to load them.
29
+ </p>
30
+ </div> -->
invoice.jpg ADDED
medical_bill_1.png ADDED

Git LFS Details

  • SHA256: 490c80d79d65ad86379402742e9d8c91b9c29cfca976d98a6831fca61e2a2dc1
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
medical_bill_2.png ADDED

Git LFS Details

  • SHA256: ed386f307082a0c4573456a0b8ee2587bd455e59f08fcbba0c9bdc3d9c8ecccd
  • Pointer size: 132 Bytes
  • Size of remote file: 1.58 MB
paddlenlp-preview.jpeg ADDED

Git LFS Details

  • SHA256: 401f593de925041f45fb4acb63a9dc256770898e79a3b06a2bb3e2df81c092a2
  • Pointer size: 131 Bytes
  • Size of remote file: 118 kB
poster.png ADDED

Git LFS Details

  • SHA256: d68ae0e3d9933e130830fa5664742ad6f5f2411f945f94d3462e965f7e32628f
  • Pointer size: 131 Bytes
  • Size of remote file: 526 kB
receipt.png ADDED

Git LFS Details

  • SHA256: e18e469df76ff53a83a38c7e7180034d31ca857b81bddc211e9377e50ce79b37
  • Pointer size: 131 Bytes
  • Size of remote file: 567 kB
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fitz
2
+ numpy
3
+ opencv-python
4
+ pdf2image
5
+ requests
resume.png ADDED

Git LFS Details

  • SHA256: 7be8498397a59f6aedf3cbee96041aea96b5d8f1aa667cf1d3ac5e93a7716734
  • Pointer size: 131 Bytes
  • Size of remote file: 191 kB
website_design_guide.jpeg ADDED

Git LFS Details

  • SHA256: 7bf1f5c4f1b58416db6335f13ea7d6c1d78ab7328ce8209766b333308474aeef
  • Pointer size: 132 Bytes
  • Size of remote file: 1.23 MB