Omnibus commited on
Commit
7754df6
·
1 Parent(s): 9c777f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -15
app.py CHANGED
@@ -67,36 +67,22 @@ ocr_id = {
67
  def pdf_pil(file_path,page_num):
68
 
69
  pdf = pdfium.PdfDocument("data.pdf")
70
- print ("\n PDF read !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
71
- #n_pages = len(pdf)
72
- #for page_number in range(n_pages):
73
  page = pdf.get_page(int(page_num)-1)
74
- print ("\n Page read !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
75
-
76
  bitmap = page.render(
77
  scale = 1, # 72dpi resolution
78
  rotation = 0, # no additional rotation
79
  # ... further rendering options
80
  )
81
- print ("\n Page rendered !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
82
-
83
  pil_image = bitmap.to_pil()
84
- print ("\n Page to PIL !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
85
-
86
  pil_image.save(f"image_{page_num}.png")
87
- print ("\n Page saved !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
88
 
89
  return (f"image_{page_num}.png")
90
 
91
  def ocrpdf(file_path,pdf_lang,page_num):
92
  img1 = pdf_pil(file_path,page_num)
93
- print("DONE 1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
94
  lang=[f"{ocr_id[pdf_lang]}"]
95
- print("DONE 2 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
96
  reader = easyocr.Reader(lang)
97
- print("DONE 3 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
98
- bounds = reader.readtext(img1)
99
- print("DONE 4 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
100
 
101
  this = ""
102
  for bound in bounds:
 
67
  def pdf_pil(file_path,page_num):
68
 
69
  pdf = pdfium.PdfDocument("data.pdf")
 
 
 
70
  page = pdf.get_page(int(page_num)-1)
 
 
71
  bitmap = page.render(
72
  scale = 1, # 72dpi resolution
73
  rotation = 0, # no additional rotation
74
  # ... further rendering options
75
  )
 
 
76
  pil_image = bitmap.to_pil()
 
 
77
  pil_image.save(f"image_{page_num}.png")
 
78
 
79
  return (f"image_{page_num}.png")
80
 
81
  def ocrpdf(file_path,pdf_lang,page_num):
82
  img1 = pdf_pil(file_path,page_num)
 
83
  lang=[f"{ocr_id[pdf_lang]}"]
 
84
  reader = easyocr.Reader(lang)
85
+ bounds = reader.readtext(img1,detail=1)
 
 
86
 
87
  this = ""
88
  for bound in bounds: