Spaces:

Soumen
/

Text-Summarization-and-NLP-tasks

Sleeping

Soumen commited on Nov 25, 2022

Commit

6c29a84

1 Parent(s): 8604911

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -49,13 +49,12 @@ def read_pdf(file):
 	count = pdfReader.numPages
 	all_page_text = ""
 	for i in range(count):
-      page = pdfReader.getPage(i)
-      image_name = "Page_" + str(i) + ".jpg"
-      page.save(image_name, "JPEG")
-      text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
-      all_page_text += text + " " #page.extractText()
     return all_page_text
 #def read_pdf_with_pdfplumber(file):
 #	with pdfplumber.open(file) as pdf:
 #	    page = pdf.pages[0]

 	count = pdfReader.numPages
 	all_page_text = ""
 	for i in range(count):
+        page = pdfReader.getPage(i)
+        image_name = "Page_" + str(i) + ".jpg"
+        page.save(image_name, "JPEG")
+        text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
+        all_page_text += text + " " #page.extractText()
     return all_page_text
 #def read_pdf_with_pdfplumber(file):
 #	with pdfplumber.open(file) as pdf:
 #	    page = pdf.pages[0]