Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -77,11 +77,11 @@ def pdf_pil(file_path,page_num):
|
|
77 |
|
78 |
return (f"image_{page_num}.png")
|
79 |
|
80 |
-
def ocrpdf(file_path,pdf_lang,page_num,sent_wid,):
|
81 |
img1 = pdf_pil(file_path,page_num)
|
82 |
lang=[f"{ocr_id[pdf_lang]}"]
|
83 |
reader = easyocr.Reader(lang)
|
84 |
-
bounds = reader.readtext(img1,width_ths=
|
85 |
|
86 |
this = ""
|
87 |
for bound in bounds:
|
@@ -97,7 +97,7 @@ def scrape(instring):
|
|
97 |
</div>''')
|
98 |
return gr.HTML.update(f'''{html_src}''')
|
99 |
|
100 |
-
def scrape00(instring, page_num,pdf_lang):
|
101 |
response = requests.get(instring, stream=True)
|
102 |
|
103 |
if response.status_code == 200:
|
@@ -119,7 +119,7 @@ def scrape00(instring, page_num,pdf_lang):
|
|
119 |
sum_out = summarizer(text)
|
120 |
except Exception:
|
121 |
try:
|
122 |
-
text = ocrpdf("data.pdf",pdf_lang,page_num)
|
123 |
sum_out = summarizer(text)
|
124 |
except Exception:
|
125 |
sum_out = "Error"
|
|
|
77 |
|
78 |
return (f"image_{page_num}.png")
|
79 |
|
80 |
+
def ocrpdf(file_path,pdf_lang,page_num,sent_wid,contrast_det):
|
81 |
img1 = pdf_pil(file_path,page_num)
|
82 |
lang=[f"{ocr_id[pdf_lang]}"]
|
83 |
reader = easyocr.Reader(lang)
|
84 |
+
bounds = reader.readtext(img1,width_ths=sent_wid,contrast_ths=contrast_det)
|
85 |
|
86 |
this = ""
|
87 |
for bound in bounds:
|
|
|
97 |
</div>''')
|
98 |
return gr.HTML.update(f'''{html_src}''')
|
99 |
|
100 |
+
def scrape00(instring, page_num,pdf_lang,sent_wid,contrast_det):
|
101 |
response = requests.get(instring, stream=True)
|
102 |
|
103 |
if response.status_code == 200:
|
|
|
119 |
sum_out = summarizer(text)
|
120 |
except Exception:
|
121 |
try:
|
122 |
+
text = ocrpdf("data.pdf",pdf_lang,page_num,sent_wid,contrast_det)
|
123 |
sum_out = summarizer(text)
|
124 |
except Exception:
|
125 |
sum_out = "Error"
|