Spaces:

Omnibus
/

pdf-reader

Sleeping

pdf-reader / app.py

Update app.py

4aeb6eb almost 2 years ago

1.7 kB

	import gradio as gr
	from bs4 import BeautifulSoup as bs
	from pypdf import PdfReader

	#import html5lib
	#import copy
	import requests
	#from IPython.display import IFrame

	def scrape(instring):

	return gr.HTML.update(f'''<object data="https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" width="100%" height="500px"></object>''')

	def scrape1(instring):
	# set the url to perform the get request
	URL = f'{instring}'
	page = requests.get(URL)

	# load the page content
	text = page.content

	# make a soup object by using beautiful
	# soup and set the markup as html parser
	soup = bs(text, "html.parser")
	out = str(soup.prettify())
	return gr.HTML.update(f'''<object data={instring} type="application/pdf" width="100%" height="500px">''')
	def scrape0(instring):

	#r = requests.get(instring)
	chunk_size=2000
	url = f'{instring}'
	r = requests.get(url, stream=True)
	html_content = requests.get(url).text
	soup = bs(html_content,"html.parser")

	with open('metadata.pdf', 'wb') as fd:
	for chunk in r.iter_content(chunk_size):
	fd.write(chunk)

	try:
	out = r.content

	except Exception:
	#out=copy.copy(soup)
	print ("No Divs")

	#out = IFrame(src={instring}, width=700, height=600)
	#return gr.HTML.update(f'''<iframe src={out}, width=700, height=600></iframe>''')
	return gr.HTML.update(f'''<object data=metadata.pdf type="application/pdf" width="100%" height="500px">''')

	with gr.Blocks() as app:
	inp=gr.Textbox()
	go_btn = gr.Button()
	outp = gr.HTML()
	go_btn.click(scrape0,inp,outp)
	app.queue(concurrency_count=10).launch()