Omnibus commited on
Commit
d02b2ab
·
1 Parent(s): 2d1281f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -64
app.py CHANGED
@@ -1,78 +1,16 @@
1
  import gradio as gr
2
- from bs4 import BeautifulSoup as bs
3
- from pypdf import PdfReader
4
- from pathlib import Path
5
- import os
6
- import sys
7
- #import html5lib
8
- #import copy
9
- import requests
10
- #from IPython.display import IFrame
11
 
12
 
13
  def scrape(instring):
14
  html_src=(f'''
15
  <div style="text-align:center">
16
  <h4>Pdf viewer testing</h4>
17
- <iframe src="https://docs.google.com/viewer?url={instring}&embedded=true" frameborder="0" height="500px" width="100%"></iframe>
18
  </div>''')
19
  return gr.HTML.update(f'''{html_src}''')
20
 
21
 
22
- def scrape00(instring):
23
- response = requests.get(instring, stream=True)
24
-
25
- if response.status_code == 200:
26
- with open("data.pdf", "wb") as f:
27
- f.write(response.content)
28
- else:
29
- print(response.status_code)
30
-
31
-
32
- out = Path("./data.pdf")
33
- print (out)
34
- reader = PdfReader("data.pdf")
35
- number_of_pages = len(reader.pages)
36
- page = reader.pages[0]
37
- text = page.extract_text()
38
- return gr.HTML.update(f'''<embed src={out} type="application/pdf" width="100%" height="500px" />''')
39
-
40
- def scrape1(instring):
41
- # set the url to perform the get request
42
- URL = f'{instring}'
43
- page = requests.get(URL)
44
-
45
- # load the page content
46
- text = page.content
47
-
48
- # make a soup object by using beautiful
49
- # soup and set the markup as html parser
50
- soup = bs(text, "html.parser")
51
- out = str(soup.prettify())
52
- return gr.HTML.update(f'''<object data={instring} type="application/pdf" width="100%" height="500px">''')
53
- def scrape0(instring):
54
-
55
- #r = requests.get(instring)
56
- chunk_size=2000
57
- url = f'{instring}'
58
- r = requests.get(url, stream=True)
59
- html_content = requests.get(url).text
60
- soup = bs(html_content,"html.parser")
61
-
62
- with open('metadata.pdf', 'wb') as fd:
63
- for chunk in r.iter_content(chunk_size):
64
- fd.write(chunk)
65
-
66
- try:
67
- out = r.content
68
-
69
- except Exception:
70
- #out=copy.copy(soup)
71
- print ("No Divs")
72
-
73
- #out = IFrame(src={instring}, width=700, height=600)
74
- #return gr.HTML.update(f'''<iframe src={out}, width=700, height=600></iframe>''')
75
- return gr.HTML.update(f'''<object data=metadata.pdf type="application/pdf" width="100%" height="500px">''')
76
 
77
  with gr.Blocks() as app:
78
  inp=gr.Textbox()
 
1
  import gradio as gr
2
+
 
 
 
 
 
 
 
 
3
 
4
 
5
  def scrape(instring):
6
  html_src=(f'''
7
  <div style="text-align:center">
8
  <h4>Pdf viewer testing</h4>
9
+ <iframe src="https://docs.google.com/viewer?url={instring}&embedded=true" frameborder="0" height="1200px" width="100%"></iframe>
10
  </div>''')
11
  return gr.HTML.update(f'''{html_src}''')
12
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  with gr.Blocks() as app:
16
  inp=gr.Textbox()