File size: 1,347 Bytes
3919e25
a648a91
3919e25
 
 
a648a91
3919e25
 
3b77cd2
 
 
 
 
 
 
 
 
 
 
 
 
3919e25
13951ed
aaca73e
13951ed
7c1d83e
1a8e0e1
a648a91
1a8e0e1
13951ed
 
 
 
3919e25
b9c90b4
13951ed
3919e25
 
 
 
13951ed
8f70505
3f50fbe
8f70505
502b110
3919e25
 
563ca5d
3919e25
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import gradio as gr
from bs4 import BeautifulSoup as bs 
#import html5lib
#import copy
import requests 
#from IPython.display import IFrame

def scrape(instring):
    # set the url to perform the get request
    URL = f'{instring}'
    page = requests.get(URL)
      
    # load the page content
    text = page.content
      
    # make a soup object by using beautiful
    # soup and set the markup as html parser
    soup = bs(text, "html.parser")
    out = str(soup.prettify())
    return gr.HTML.update(f'''{out}''')
def scrape0(instring):
    
    #r = requests.get(instring) 
    chunk_size=2000
    url = f'{instring}'
    r = requests.get(url, stream=True)
    html_content = requests.get(url).text
    soup = bs(html_content,"html.parser")

    with open('/tmp/metadata.pdf', 'wb') as fd:
        for chunk in r.iter_content(chunk_size):
            fd.write(chunk)

    try:
        out = r.content
       
    except Exception:
        #out=copy.copy(soup)
        print ("No Divs")

    #out = IFrame(src={instring}, width=700, height=600)
    #return gr.HTML.update(f'''<iframe src={out}, width=700, height=600></iframe>''')
    return gr.HTML.update(f'''{soup}''')

with gr.Blocks() as app:
    inp=gr.Textbox()
    go_btn = gr.Button()
    outp = gr.HTML()
    go_btn.click(scrape,inp,outp)
app.queue(concurrency_count=10).launch()