Update app.py
Browse files
app.py
CHANGED
@@ -49,13 +49,12 @@ def read_pdf(file):
|
|
49 |
count = pdfReader.numPages
|
50 |
all_page_text = ""
|
51 |
for i in range(count):
|
52 |
-
|
53 |
image_name = "Page_" + str(i) + ".jpg"
|
54 |
-
page.save(image_name, "JPEG")
|
55 |
text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
|
56 |
-
|
57 |
-
|
58 |
-
return all_page_text
|
59 |
|
60 |
#def read_pdf_with_pdfplumber(file):
|
61 |
# with pdfplumber.open(file) as pdf:
|
|
|
49 |
count = pdfReader.numPages
|
50 |
all_page_text = ""
|
51 |
for i in range(count):
|
52 |
+
page = pdfReader.getPage(i)
|
53 |
image_name = "Page_" + str(i) + ".jpg"
|
54 |
+
page.save(image_name, "JPEG")
|
55 |
text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
|
56 |
+
all_page_text += text + " " #page.extractText()
|
57 |
+
return all_page_text
|
|
|
58 |
|
59 |
#def read_pdf_with_pdfplumber(file):
|
60 |
# with pdfplumber.open(file) as pdf:
|