Spaces:

Omnibus
/

pdf-reader

Running

File size: 1,347 Bytes

3919e25
a648a91
3919e25
 
 
a648a91
3919e25
 
3b77cd2
 
 
 
 
 
 
 
 
 
 
 
 
3919e25
13951ed
aaca73e
13951ed
7c1d83e
1a8e0e1
a648a91
1a8e0e1
13951ed
 
 
 
3919e25
b9c90b4
13951ed
3919e25
 
 
 
13951ed
8f70505
3f50fbe
8f70505
502b110
3919e25
 
563ca5d
3919e25

import gradio as gr
from bs4 import BeautifulSoup as bs 
#import html5lib
#import copy
import requests 
#from IPython.display import IFrame

def scrape(instring):
    # set the url to perform the get request
    URL = f'{instring}'
    page = requests.get(URL)
      
    # load the page content
    text = page.content
      
    # make a soup object by using beautiful
    # soup and set the markup as html parser
    soup = bs(text, "html.parser")
    out = str(soup.prettify())
    return gr.HTML.update(f'''{out}''')
def scrape0(instring):
    
    #r = requests.get(instring) 
    chunk_size=2000
    url = f'{instring}'
    r = requests.get(url, stream=True)
    html_content = requests.get(url).text
    soup = bs(html_content,"html.parser")

    with open('/tmp/metadata.pdf', 'wb') as fd:
        for chunk in r.iter_content(chunk_size):
            fd.write(chunk)

    try:
        out = r.content
       
    except Exception:
        #out=copy.copy(soup)
        print ("No Divs")

    #out = IFrame(src={instring}, width=700, height=600)
    #return gr.HTML.update(f'''<iframe src={out}, width=700, height=600></iframe>''')
    return gr.HTML.update(f'''{soup}''')

with gr.Blocks() as app:
    inp=gr.Textbox()
    go_btn = gr.Button()
    outp = gr.HTML()
    go_btn.click(scrape,inp,outp)
app.queue(concurrency_count=10).launch()