Raj-Master commited on
Commit
23460f1
·
verified ·
1 Parent(s): f6599ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -43
app.py CHANGED
@@ -1,12 +1,12 @@
1
- import gradio as gr
 
 
 
2
  from pdf2image import convert_from_path
3
  from paddleocr import PaddleOCR
4
- from numpy import asarray
5
- import gradio as gr
6
  from gradio_pdf import PDF
7
- from pdf2image import convert_from_path
8
- from transformers import pipeline
9
- from pathlib import Path
10
 
11
  ocr = PaddleOCR(use_angle_cls=True, lang="ch", ocr_version="PP-OCRv3")
12
 
@@ -16,38 +16,6 @@ def p(image,question):
16
 
17
  return ocr_text
18
 
19
- # up_file="/home/raj/Downloads/ICBC Aviation Leasing Company Limited_ND2A_220808.pdf"
20
- # images = convert_from_path(up_file, fmt="jpeg")
21
-
22
- # output = ""
23
- # for idx, image in enumerate(images, start=1):
24
- # # result = reader.readtext(image, detail = 0)
25
- # # ocr_text = " ".join(result)
26
- # result = ocr.ocr(asarray(image), cls=True)
27
- # ocr_text = " ".join([line[1][0] for line in result[0]])
28
-
29
- # new_prompt = f"""
30
- # {ocr_text}
31
-
32
- # Above is OCR'ed text from a form PDF file.
33
-
34
- # List out all the form key value which have data. Don't include fields that are empty.
35
- # """
36
- # # llm_output = llm(prompt=new_prompt)
37
-
38
- # # output += f"Page {idx}\n"
39
- # # output += llm_output
40
- # output += "\n\n "
41
-
42
- # output=new_prompt
43
- # print(output)
44
-
45
- dir_ = Path(__file__).parent
46
-
47
- # p = pipeline(
48
- # "document-question-answering",
49
- # model="impira/layoutlm-document-qa",
50
- # )
51
 
52
  def qa(question: str, doc: str) -> str:
53
  output=""
@@ -65,9 +33,4 @@ demo = gr.Interface(
65
  )
66
 
67
  demo.launch()
68
- # def greet(name):
69
- # return "Hello " + name + "!!"
70
-
71
- # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
72
- # iface.launch()
73
 
 
1
+ import os
2
+ os.system('pip install paddlepaddle>=2.0.1')
3
+ # os.system('pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html')
4
+ os.system('pip install paddleocr')import gradio as gr
5
  from pdf2image import convert_from_path
6
  from paddleocr import PaddleOCR
 
 
7
  from gradio_pdf import PDF
8
+
9
+
 
10
 
11
  ocr = PaddleOCR(use_angle_cls=True, lang="ch", ocr_version="PP-OCRv3")
12
 
 
16
 
17
  return ocr_text
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def qa(question: str, doc: str) -> str:
21
  output=""
 
33
  )
34
 
35
  demo.launch()
 
 
 
 
 
36