Spaces:
Running
Running
File size: 1,417 Bytes
3919e25 a648a91 3919e25 a648a91 3919e25 3b77cd2 3ff2217 3b77cd2 3919e25 13951ed aaca73e 13951ed 7c1d83e 1a8e0e1 a648a91 1a8e0e1 13951ed 3919e25 b9c90b4 13951ed 3919e25 13951ed 8f70505 3f50fbe 8f70505 502b110 3919e25 563ca5d 3919e25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from bs4 import BeautifulSoup as bs
#import html5lib
#import copy
import requests
#from IPython.display import IFrame
def scrape(instring):
# set the url to perform the get request
URL = f'{instring}'
page = requests.get(URL)
# load the page content
text = page.content
# make a soup object by using beautiful
# soup and set the markup as html parser
soup = bs(text, "html.parser")
out = str(soup.prettify())
return gr.HTML.update(f'''<object data={instring} type="application/pdf" width="100%" height="500px">''')
def scrape0(instring):
#r = requests.get(instring)
chunk_size=2000
url = f'{instring}'
r = requests.get(url, stream=True)
html_content = requests.get(url).text
soup = bs(html_content,"html.parser")
with open('/tmp/metadata.pdf', 'wb') as fd:
for chunk in r.iter_content(chunk_size):
fd.write(chunk)
try:
out = r.content
except Exception:
#out=copy.copy(soup)
print ("No Divs")
#out = IFrame(src={instring}, width=700, height=600)
#return gr.HTML.update(f'''<iframe src={out}, width=700, height=600></iframe>''')
return gr.HTML.update(f'''{soup}''')
with gr.Blocks() as app:
inp=gr.Textbox()
go_btn = gr.Button()
outp = gr.HTML()
go_btn.click(scrape,inp,outp)
app.queue(concurrency_count=10).launch() |