Soumen commited on
Commit
dcd8793
·
1 Parent(s): c2639e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -30
app.py CHANGED
@@ -48,24 +48,9 @@ def read_pdf(file):
48
  all_page_text = " "
49
  for i in range(count):
50
  page = pdfReader.getPage(i)
51
- # img = Image.open(page)
52
- # img = Image.open(page)
53
- # img = img.save("img.png")
54
- # image_name = cv2.imread("img.png")
55
- # # get co-ordinates to cr
56
- # text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
57
  all_page_text += page.extractText()+" "
58
  return all_page_text
59
- # def read_pdf_with_pdfplumber(file):
60
- # # Open the uploaded PDF file with pdfplumber
61
- # with pdfplumber.open(file) as pdf:
62
- # extracted_text = ''
63
- # for page in pdf.pages:
64
- # extracted_text += page.extract_text()
65
 
66
- # # Display the extracted text
67
- # #st.text(extracted_text)
68
- # return extracted_text
69
  def engsum(output):
70
  def query(payload):
71
  response = requests.post(API_URL1, headers=headers1, json=payload)
@@ -121,18 +106,6 @@ def main():
121
  img = Image.open(uploaded_photo)
122
  img = img.save("img.png")
123
  img = cv2.imread("img.png")
124
- # get co-ordinates to crop the image
125
- #imag, lc = line_cor.mark_region(imge)
126
- #st.success(*lc)
127
- # c = lc
128
- # cropping image img = image[y0:y1, x0:x1]
129
- #imgg = imge[c[0][1]:c[1][1], c[0][0]:c[1][0]]
130
- #plt.figure(figsize=(10,10))
131
- # plt.imshow(img)
132
- # convert the image to black and white for better OCR
133
- #ret,thresh1 = cv2.threshold(imge,120,255,cv2.THRESH_BINARY)
134
- # pytesseract image to string to get results
135
- #text = str(pytesseract.image_to_string(img, config='--psm 6',lang="ben")) if st.checkbox("Bangla") else str(pytesseract.image_to_string(thresh1, config='--psm 6'))
136
  if st.checkbox("Bangla"):
137
  text = pytesseract.image_to_string(img, lang="ben")
138
  else:
@@ -149,8 +122,6 @@ def main():
149
  text=pytesseract.image_to_string(img)
150
  #st.success(text)
151
  elif uploaded_photo==None and camera_photo==None:
152
- #our_image=load_image("image.jpg")
153
- #img = cv2.imread("scholarly_text.jpg")
154
  text = message
155
 
156
  if st.checkbox("English Text Generation"):
@@ -167,7 +138,7 @@ def main():
167
  st.success(text_output)
168
 
169
  if st.checkbox("Mark for Text Summarization"):
170
- if st.checkbox("Bangla")
171
  bansum(text)
172
  else:
173
  engsum(text)
 
48
  all_page_text = " "
49
  for i in range(count):
50
  page = pdfReader.getPage(i)
 
 
 
 
 
 
51
  all_page_text += page.extractText()+" "
52
  return all_page_text
 
 
 
 
 
 
53
 
 
 
 
54
  def engsum(output):
55
  def query(payload):
56
  response = requests.post(API_URL1, headers=headers1, json=payload)
 
106
  img = Image.open(uploaded_photo)
107
  img = img.save("img.png")
108
  img = cv2.imread("img.png")
 
 
 
 
 
 
 
 
 
 
 
 
109
  if st.checkbox("Bangla"):
110
  text = pytesseract.image_to_string(img, lang="ben")
111
  else:
 
122
  text=pytesseract.image_to_string(img)
123
  #st.success(text)
124
  elif uploaded_photo==None and camera_photo==None:
 
 
125
  text = message
126
 
127
  if st.checkbox("English Text Generation"):
 
138
  st.success(text_output)
139
 
140
  if st.checkbox("Mark for Text Summarization"):
141
+ if st.checkbox("Bangla"):
142
  bansum(text)
143
  else:
144
  engsum(text)