Omnibus commited on
Commit
bdb82f0
·
1 Parent(s): e8481be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -77,11 +77,11 @@ def pdf_pil(file_path,page_num):
77
 
78
  return (f"image_{page_num}.png")
79
 
80
- def ocrpdf(file_path,pdf_lang,page_num,sent_wid,):
81
  img1 = pdf_pil(file_path,page_num)
82
  lang=[f"{ocr_id[pdf_lang]}"]
83
  reader = easyocr.Reader(lang)
84
- bounds = reader.readtext(img1,width_ths=1)
85
 
86
  this = ""
87
  for bound in bounds:
@@ -97,7 +97,7 @@ def scrape(instring):
97
  </div>''')
98
  return gr.HTML.update(f'''{html_src}''')
99
 
100
- def scrape00(instring, page_num,pdf_lang):
101
  response = requests.get(instring, stream=True)
102
 
103
  if response.status_code == 200:
@@ -119,7 +119,7 @@ def scrape00(instring, page_num,pdf_lang):
119
  sum_out = summarizer(text)
120
  except Exception:
121
  try:
122
- text = ocrpdf("data.pdf",pdf_lang,page_num)
123
  sum_out = summarizer(text)
124
  except Exception:
125
  sum_out = "Error"
 
77
 
78
  return (f"image_{page_num}.png")
79
 
80
+ def ocrpdf(file_path,pdf_lang,page_num,sent_wid,contrast_det):
81
  img1 = pdf_pil(file_path,page_num)
82
  lang=[f"{ocr_id[pdf_lang]}"]
83
  reader = easyocr.Reader(lang)
84
+ bounds = reader.readtext(img1,width_ths=sent_wid,contrast_ths=contrast_det)
85
 
86
  this = ""
87
  for bound in bounds:
 
97
  </div>''')
98
  return gr.HTML.update(f'''{html_src}''')
99
 
100
+ def scrape00(instring, page_num,pdf_lang,sent_wid,contrast_det):
101
  response = requests.get(instring, stream=True)
102
 
103
  if response.status_code == 200:
 
119
  sum_out = summarizer(text)
120
  except Exception:
121
  try:
122
+ text = ocrpdf("data.pdf",pdf_lang,page_num,sent_wid,contrast_det)
123
  sum_out = summarizer(text)
124
  except Exception:
125
  sum_out = "Error"