import gradio as gr from bs4 import BeautifulSoup as bs from pypdf import PdfReader #import html5lib #import copy import requests #from IPython.display import IFrame def scrape(instring): return gr.HTML.update(f'''''') def scrape1(instring): # set the url to perform the get request URL = f'{instring}' page = requests.get(URL) # load the page content text = page.content # make a soup object by using beautiful # soup and set the markup as html parser soup = bs(text, "html.parser") out = str(soup.prettify()) return gr.HTML.update(f'''''') def scrape0(instring): #r = requests.get(instring) chunk_size=2000 url = f'{instring}' r = requests.get(url, stream=True) html_content = requests.get(url).text soup = bs(html_content,"html.parser") with open('metadata.pdf', 'wb') as fd: for chunk in r.iter_content(chunk_size): fd.write(chunk) try: out = r.content except Exception: #out=copy.copy(soup) print ("No Divs") #out = IFrame(src={instring}, width=700, height=600) #return gr.HTML.update(f'''''') return gr.HTML.update(f'''''') with gr.Blocks() as app: inp=gr.Textbox() go_btn = gr.Button() outp = gr.HTML() go_btn.click(scrape0,inp,outp) app.queue(concurrency_count=10).launch()