Spaces:

Soumen
/

Text-Summarization-and-NLP-tasks

Sleeping

App Files Files Community

Soumen commited on Sep 8, 2023

Commit

dcd8793

1 Parent(s): c2639e9

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -30

app.py CHANGED Viewed

@@ -48,24 +48,9 @@ def read_pdf(file):
     all_page_text = " "
     for i in range(count):
         page = pdfReader.getPage(i)
-        # img = Image.open(page)
-        # img = Image.open(page)
-        # img = img.save("img.png")
-        # image_name = cv2.imread("img.png")
-        # # get co-ordinates to cr
-#         text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
         all_page_text += page.extractText()+" "
     return all_page_text
-# def read_pdf_with_pdfplumber(file):
-#     # Open the uploaded PDF file with pdfplumber
-#     with pdfplumber.open(file) as pdf:
-#         extracted_text = ''
-#         for page in pdf.pages:
-#             extracted_text += page.extract_text()
-#     # Display the extracted text
-#     #st.text(extracted_text)
-#     return extracted_text
 def engsum(output):
     def query(payload):
         response = requests.post(API_URL1, headers=headers1, json=payload)
@@ -121,18 +106,6 @@ def main():
             img = Image.open(uploaded_photo)
             img = img.save("img.png")
             img = cv2.imread("img.png")
-            # get co-ordinates to crop the image
-            #imag, lc = line_cor.mark_region(imge)
-            #st.success(*lc)
-           # c = lc
-            # cropping image img = image[y0:y1, x0:x1]
-            #imgg = imge[c[0][1]:c[1][1], c[0][0]:c[1][0]]
-            #plt.figure(figsize=(10,10))
-           # plt.imshow(img)
-            # convert the image to black and white for better OCR
-            #ret,thresh1 = cv2.threshold(imge,120,255,cv2.THRESH_BINARY)
-            # pytesseract image to string to get results
-            #text = str(pytesseract.image_to_string(img, config='--psm 6',lang="ben")) if st.checkbox("Bangla") else str(pytesseract.image_to_string(thresh1, config='--psm 6'))
             if st.checkbox("Bangla"):
                 text =  pytesseract.image_to_string(img, lang="ben")
             else:
@@ -149,8 +122,6 @@ def main():
                 text=pytesseract.image_to_string(img)
             #st.success(text)
         elif uploaded_photo==None and camera_photo==None:
-    	#our_image=load_image("image.jpg")
-        #img = cv2.imread("scholarly_text.jpg")
             text = message
         if st.checkbox("English Text Generation"):
@@ -167,7 +138,7 @@ def main():
                 st.success(text_output)
         if st.checkbox("Mark for Text Summarization"):
-            if st.checkbox("Bangla")
                 bansum(text)
             else:
                 engsum(text)

     all_page_text = " "
     for i in range(count):
         page = pdfReader.getPage(i)
         all_page_text += page.extractText()+" "
     return all_page_text
 def engsum(output):
     def query(payload):
         response = requests.post(API_URL1, headers=headers1, json=payload)
             img = Image.open(uploaded_photo)
             img = img.save("img.png")
             img = cv2.imread("img.png")
             if st.checkbox("Bangla"):
                 text =  pytesseract.image_to_string(img, lang="ben")
             else:
                 text=pytesseract.image_to_string(img)
             #st.success(text)
         elif uploaded_photo==None and camera_photo==None:
             text = message
         if st.checkbox("English Text Generation"):
                 st.success(text_output)
         if st.checkbox("Mark for Text Summarization"):
+            if st.checkbox("Bangla"):
                 bansum(text)
             else:
                 engsum(text)