Soumen commited on
Commit
42268bf
·
1 Parent(s): 82f9c58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -51,11 +51,12 @@ def read_pdf(file):
51
  #pdfReader = PdfFileReader(file)
52
  #count = pdfReader.numPages
53
  all_page_text = ""
54
- for i in range(len(images)):
55
  #page = pdfReader.getPage(i)
56
- img=images[i]
57
- image_name = "img_" + str(i) + ".jpg"
58
- img.save(image_name, "JPEG")
 
59
  text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
60
  all_page_text += text + " " #page.extractText()
61
  return all_page_text
 
51
  #pdfReader = PdfFileReader(file)
52
  #count = pdfReader.numPages
53
  all_page_text = ""
54
+ for im in images:
55
  #page = pdfReader.getPage(i)
56
+ img = Image.open(im)
57
+ img = img.save("img.png")
58
+ image_name = cv2.imread("img.png")
59
+ # get co-ordinates to cr
60
  text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
61
  all_page_text += text + " " #page.extractText()
62
  return all_page_text