Spaces:
Running
Running
File size: 1,347 Bytes
3919e25 a648a91 3919e25 a648a91 3919e25 3b77cd2 3919e25 13951ed aaca73e 13951ed 7c1d83e 1a8e0e1 a648a91 1a8e0e1 13951ed 3919e25 b9c90b4 13951ed 3919e25 13951ed 8f70505 3f50fbe 8f70505 502b110 3919e25 563ca5d 3919e25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from bs4 import BeautifulSoup as bs
#import html5lib
#import copy
import requests
#from IPython.display import IFrame
def scrape(instring):
# set the url to perform the get request
URL = f'{instring}'
page = requests.get(URL)
# load the page content
text = page.content
# make a soup object by using beautiful
# soup and set the markup as html parser
soup = bs(text, "html.parser")
out = str(soup.prettify())
return gr.HTML.update(f'''{out}''')
def scrape0(instring):
#r = requests.get(instring)
chunk_size=2000
url = f'{instring}'
r = requests.get(url, stream=True)
html_content = requests.get(url).text
soup = bs(html_content,"html.parser")
with open('/tmp/metadata.pdf', 'wb') as fd:
for chunk in r.iter_content(chunk_size):
fd.write(chunk)
try:
out = r.content
except Exception:
#out=copy.copy(soup)
print ("No Divs")
#out = IFrame(src={instring}, width=700, height=600)
#return gr.HTML.update(f'''<iframe src={out}, width=700, height=600></iframe>''')
return gr.HTML.update(f'''{soup}''')
with gr.Blocks() as app:
inp=gr.Textbox()
go_btn = gr.Button()
outp = gr.HTML()
go_btn.click(scrape,inp,outp)
app.queue(concurrency_count=10).launch() |