Spaces:

Soumen
/

Text-Summarization-and-NLP-tasks

Sleeping

Soumen commited on Nov 25, 2022

Commit

f0ec4df

1 Parent(s): bb3614e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -64,14 +64,14 @@ from PIL import Image
 #         all_page_text += text + " " #page.extractText()
 #     return all_page_text
 def read_pdf_with_pdfplumber(file):
-    all_page_text=""
     with pdfplumber.open(file) as pdf:
 	    page = pdf.pages[0]
         #return page.extract_text()
         # get co-ordinates to cr
         #img = Image.open(im)
-        img = page.save("img.png")
-        image_name = cv2.imread("img.png")
 ## get co-ordinates to cr
     text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
     all_page_text += text + " " #page.extractText()

 #         all_page_text += text + " " #page.extractText()
 #     return all_page_text
 def read_pdf_with_pdfplumber(file):
+    all_page_text=" "
     with pdfplumber.open(file) as pdf:
 	    page = pdf.pages[0]
         #return page.extract_text()
         # get co-ordinates to cr
         #img = Image.open(im)
+    img = page.save("img.png")
+    image_name = cv2.imread("img.png")
 ## get co-ordinates to cr
     text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
     all_page_text += text + " " #page.extractText()