Update app.py
Browse files
app.py
CHANGED
@@ -48,24 +48,9 @@ def read_pdf(file):
|
|
48 |
all_page_text = " "
|
49 |
for i in range(count):
|
50 |
page = pdfReader.getPage(i)
|
51 |
-
# img = Image.open(page)
|
52 |
-
# img = Image.open(page)
|
53 |
-
# img = img.save("img.png")
|
54 |
-
# image_name = cv2.imread("img.png")
|
55 |
-
# # get co-ordinates to cr
|
56 |
-
# text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
|
57 |
all_page_text += page.extractText()+" "
|
58 |
return all_page_text
|
59 |
-
# def read_pdf_with_pdfplumber(file):
|
60 |
-
# # Open the uploaded PDF file with pdfplumber
|
61 |
-
# with pdfplumber.open(file) as pdf:
|
62 |
-
# extracted_text = ''
|
63 |
-
# for page in pdf.pages:
|
64 |
-
# extracted_text += page.extract_text()
|
65 |
|
66 |
-
# # Display the extracted text
|
67 |
-
# #st.text(extracted_text)
|
68 |
-
# return extracted_text
|
69 |
def engsum(output):
|
70 |
def query(payload):
|
71 |
response = requests.post(API_URL1, headers=headers1, json=payload)
|
@@ -121,18 +106,6 @@ def main():
|
|
121 |
img = Image.open(uploaded_photo)
|
122 |
img = img.save("img.png")
|
123 |
img = cv2.imread("img.png")
|
124 |
-
# get co-ordinates to crop the image
|
125 |
-
#imag, lc = line_cor.mark_region(imge)
|
126 |
-
#st.success(*lc)
|
127 |
-
# c = lc
|
128 |
-
# cropping image img = image[y0:y1, x0:x1]
|
129 |
-
#imgg = imge[c[0][1]:c[1][1], c[0][0]:c[1][0]]
|
130 |
-
#plt.figure(figsize=(10,10))
|
131 |
-
# plt.imshow(img)
|
132 |
-
# convert the image to black and white for better OCR
|
133 |
-
#ret,thresh1 = cv2.threshold(imge,120,255,cv2.THRESH_BINARY)
|
134 |
-
# pytesseract image to string to get results
|
135 |
-
#text = str(pytesseract.image_to_string(img, config='--psm 6',lang="ben")) if st.checkbox("Bangla") else str(pytesseract.image_to_string(thresh1, config='--psm 6'))
|
136 |
if st.checkbox("Bangla"):
|
137 |
text = pytesseract.image_to_string(img, lang="ben")
|
138 |
else:
|
@@ -149,8 +122,6 @@ def main():
|
|
149 |
text=pytesseract.image_to_string(img)
|
150 |
#st.success(text)
|
151 |
elif uploaded_photo==None and camera_photo==None:
|
152 |
-
#our_image=load_image("image.jpg")
|
153 |
-
#img = cv2.imread("scholarly_text.jpg")
|
154 |
text = message
|
155 |
|
156 |
if st.checkbox("English Text Generation"):
|
@@ -167,7 +138,7 @@ def main():
|
|
167 |
st.success(text_output)
|
168 |
|
169 |
if st.checkbox("Mark for Text Summarization"):
|
170 |
-
if st.checkbox("Bangla")
|
171 |
bansum(text)
|
172 |
else:
|
173 |
engsum(text)
|
|
|
48 |
all_page_text = " "
|
49 |
for i in range(count):
|
50 |
page = pdfReader.getPage(i)
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
all_page_text += page.extractText()+" "
|
52 |
return all_page_text
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
|
|
|
|
|
|
54 |
def engsum(output):
|
55 |
def query(payload):
|
56 |
response = requests.post(API_URL1, headers=headers1, json=payload)
|
|
|
106 |
img = Image.open(uploaded_photo)
|
107 |
img = img.save("img.png")
|
108 |
img = cv2.imread("img.png")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
if st.checkbox("Bangla"):
|
110 |
text = pytesseract.image_to_string(img, lang="ben")
|
111 |
else:
|
|
|
122 |
text=pytesseract.image_to_string(img)
|
123 |
#st.success(text)
|
124 |
elif uploaded_photo==None and camera_photo==None:
|
|
|
|
|
125 |
text = message
|
126 |
|
127 |
if st.checkbox("English Text Generation"):
|
|
|
138 |
st.success(text_output)
|
139 |
|
140 |
if st.checkbox("Mark for Text Summarization"):
|
141 |
+
if st.checkbox("Bangla"):
|
142 |
bansum(text)
|
143 |
else:
|
144 |
engsum(text)
|