Spaces:
Running
Running
import gradio as gr | |
from bs4 import BeautifulSoup as bs | |
from pypdf import PdfReader | |
#import html5lib | |
#import copy | |
import requests | |
#from IPython.display import IFrame | |
def scrape(instring): | |
return gr.HTML.update(f'''<object data="https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" width="100%" height="500px"></object>''') | |
def scrape1(instring): | |
# set the url to perform the get request | |
URL = f'{instring}' | |
page = requests.get(URL) | |
# load the page content | |
text = page.content | |
# make a soup object by using beautiful | |
# soup and set the markup as html parser | |
soup = bs(text, "html.parser") | |
out = str(soup.prettify()) | |
return gr.HTML.update(f'''<object data={instring} type="application/pdf" width="100%" height="500px">''') | |
def scrape0(instring): | |
#r = requests.get(instring) | |
chunk_size=2000 | |
url = f'{instring}' | |
r = requests.get(url, stream=True) | |
html_content = requests.get(url).text | |
soup = bs(html_content,"html.parser") | |
with open('metadata.pdf', 'wb') as fd: | |
for chunk in r.iter_content(chunk_size): | |
fd.write(chunk) | |
try: | |
out = r.content | |
except Exception: | |
#out=copy.copy(soup) | |
print ("No Divs") | |
#out = IFrame(src={instring}, width=700, height=600) | |
#return gr.HTML.update(f'''<iframe src={out}, width=700, height=600></iframe>''') | |
return gr.HTML.update(f'''<object data=metadata.pdf type="application/pdf" width="100%" height="500px">''') | |
with gr.Blocks() as app: | |
inp=gr.Textbox() | |
go_btn = gr.Button() | |
outp = gr.HTML() | |
go_btn.click(scrape0,inp,outp) | |
app.queue(concurrency_count=10).launch() |