Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,36 +1,27 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
import requests
|
3 |
from bs4 import BeautifulSoup
|
4 |
-
import re
|
5 |
|
6 |
-
def
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
full_url = link['href']
|
18 |
-
if full_url not in seen_urls:
|
19 |
-
seen_urls.add(full_url)
|
20 |
-
# HTML 문자열로 링크를 추가하며 타이틀과 URL을 포함
|
21 |
-
links_html += f"<p><a href='{full_url}' download='{full_url.split('/')[-1]}'>{title}</a></p>"
|
22 |
-
links_html += "</div>"
|
23 |
-
return links_html if links_html else "No PDF links found."
|
24 |
-
except Exception as e:
|
25 |
-
return f"An error occurred: {str(e)}"
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
output_html = gr.HTML()
|
31 |
-
btn_fetch.click(
|
32 |
-
fn=fetch_pdf_links_and_titles,
|
33 |
-
outputs=output_html
|
34 |
-
)
|
35 |
|
36 |
-
|
|
|
1 |
import gradio as gr
|
2 |
+
import re
|
3 |
import requests
|
4 |
from bs4 import BeautifulSoup
|
|
|
5 |
|
6 |
+
def extract_pdf_links(url):
|
7 |
+
response = requests.get(url)
|
8 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
9 |
+
|
10 |
+
pdf_links = []
|
11 |
+
for link in soup.find_all('a', href=True):
|
12 |
+
if re.search(r'\.pdf', link['href']):
|
13 |
+
pdf_links.append(link['href'])
|
14 |
+
|
15 |
+
return pdf_links[:10]
|
16 |
|
17 |
+
def generate_html(pdf_links):
|
18 |
+
html = ""
|
19 |
+
for link in pdf_links:
|
20 |
+
html += f'<a href="{link}" target="_blank">{link}</a><br/>'
|
21 |
+
return html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
iface = gr.Interface(extract_pdf_links,
|
24 |
+
inputs="text",
|
25 |
+
outputs="text")
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
+
iface.launch()
|