Soumen commited on
Commit
6c29a84
·
1 Parent(s): 8604911

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -49,13 +49,12 @@ def read_pdf(file):
49
  count = pdfReader.numPages
50
  all_page_text = ""
51
  for i in range(count):
52
- page = pdfReader.getPage(i)
53
- image_name = "Page_" + str(i) + ".jpg"
54
- page.save(image_name, "JPEG")
55
- text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
56
- all_page_text += text + " " #page.extractText()
57
  return all_page_text
58
-
59
  #def read_pdf_with_pdfplumber(file):
60
  # with pdfplumber.open(file) as pdf:
61
  # page = pdf.pages[0]
 
49
  count = pdfReader.numPages
50
  all_page_text = ""
51
  for i in range(count):
52
+ page = pdfReader.getPage(i)
53
+ image_name = "Page_" + str(i) + ".jpg"
54
+ page.save(image_name, "JPEG")
55
+ text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
56
+ all_page_text += text + " " #page.extractText()
57
  return all_page_text
 
58
  #def read_pdf_with_pdfplumber(file):
59
  # with pdfplumber.open(file) as pdf:
60
  # page = pdf.pages[0]