Raj-Master commited on
Commit
5a3dc06
·
verified ·
1 Parent(s): e7bba61

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +73 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pdf2image import convert_from_path
3
+ from paddleocr import PaddleOCR
4
+ from numpy import asarray
5
+ import gradio as gr
6
+ from gradio_pdf import PDF
7
+ from pdf2image import convert_from_path
8
+ from transformers import pipeline
9
+ from pathlib import Path
10
+
11
+ ocr = PaddleOCR(use_angle_cls=True, lang="ch", ocr_version="PP-OCRv3")
12
+
13
+ def p(image,question):
14
+ result = ocr.ocr(asarray(image), cls=True)
15
+ ocr_text = " ".join([line[1][0] for line in result[0]])
16
+
17
+ return ocr_text
18
+
19
+ # up_file="/home/raj/Downloads/ICBC Aviation Leasing Company Limited_ND2A_220808.pdf"
20
+ # images = convert_from_path(up_file, fmt="jpeg")
21
+
22
+ # output = ""
23
+ # for idx, image in enumerate(images, start=1):
24
+ # # result = reader.readtext(image, detail = 0)
25
+ # # ocr_text = " ".join(result)
26
+ # result = ocr.ocr(asarray(image), cls=True)
27
+ # ocr_text = " ".join([line[1][0] for line in result[0]])
28
+
29
+ # new_prompt = f"""
30
+ # {ocr_text}
31
+
32
+ # Above is OCR'ed text from a form PDF file.
33
+
34
+ # List out all the form key value which have data. Don't include fields that are empty.
35
+ # """
36
+ # # llm_output = llm(prompt=new_prompt)
37
+
38
+ # # output += f"Page {idx}\n"
39
+ # # output += llm_output
40
+ # output += "\n\n "
41
+
42
+ # output=new_prompt
43
+ # print(output)
44
+
45
+ dir_ = Path(__file__).parent
46
+
47
+ # p = pipeline(
48
+ # "document-question-answering",
49
+ # model="impira/layoutlm-document-qa",
50
+ # )
51
+
52
+ def qa(question: str, doc: str) -> str:
53
+ output=""
54
+ img = convert_from_path(doc)
55
+ for i in range(len(img)):
56
+ result=p(img[i], question)
57
+ output+="\n"+result
58
+ return output
59
+
60
+
61
+ demo = gr.Interface(
62
+ qa,
63
+ [gr.Textbox(label="Question"), PDF(label="Document")],
64
+ gr.Textbox(),
65
+ )
66
+
67
+ demo.launch()
68
+ # def greet(name):
69
+ # return "Hello " + name + "!!"
70
+
71
+ # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
72
+ # iface.launch()
73
+
requirements.txt ADDED
File without changes