Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,26 +3,27 @@ import requests
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import re
|
5 |
|
6 |
-
def
|
7 |
url = "https://finance.naver.com/research/company_list.naver"
|
8 |
response = requests.get(url)
|
9 |
soup = BeautifulSoup(response.text, 'html.parser')
|
10 |
|
11 |
-
# ๋ชจ๋ PDF
|
12 |
-
pdf_links = soup.find_all('a', href=re.compile("
|
13 |
-
|
14 |
for link in pdf_links:
|
15 |
-
|
|
|
16 |
# ๋ค์ด๋ก๋ ๊ฐ๋ฅํ ๋งํฌ ํํ๋ก ์ ์ฅ
|
17 |
-
|
18 |
-
return
|
19 |
|
20 |
# Gradio ์ธํฐํ์ด์ค
|
21 |
with gr.Blocks() as app:
|
22 |
-
btn_fetch = gr.Button("PDF ๋งํฌ ์กฐํ")
|
23 |
-
output_links = gr.Dataframe(headers=["PDF ๋งํฌ"], interactive=False)
|
24 |
btn_fetch.click(
|
25 |
-
fn=
|
26 |
outputs=output_links
|
27 |
)
|
28 |
|
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import re
|
5 |
|
6 |
+
def fetch_pdf_links_and_titles():
|
7 |
url = "https://finance.naver.com/research/company_list.naver"
|
8 |
response = requests.get(url)
|
9 |
soup = BeautifulSoup(response.text, 'html.parser')
|
10 |
|
11 |
+
# ๋ชจ๋ PDF ๋งํฌ์ ์ ๋ชฉ์ ์ฐพ์ต๋๋ค.
|
12 |
+
pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
|
13 |
+
links_and_titles = []
|
14 |
for link in pdf_links:
|
15 |
+
title = link.text.strip() # ๋งํฌ ํ
์คํธ์์ ์ ๋ชฉ ์ถ์ถ
|
16 |
+
full_url = link['href']
|
17 |
# ๋ค์ด๋ก๋ ๊ฐ๋ฅํ ๋งํฌ ํํ๋ก ์ ์ฅ
|
18 |
+
links_and_titles.append([title, f"<a href='{full_url}' download='{full_url.split('/')[-1]}'>{full_url}</a>"])
|
19 |
+
return links_and_titles
|
20 |
|
21 |
# Gradio ์ธํฐํ์ด์ค
|
22 |
with gr.Blocks() as app:
|
23 |
+
btn_fetch = gr.Button("PDF ๋งํฌ ๋ฐ ์ ๋ณด ์กฐํ")
|
24 |
+
output_links = gr.Dataframe(headers=["Title", "PDF ๋งํฌ"], interactive=False)
|
25 |
btn_fetch.click(
|
26 |
+
fn=fetch_pdf_links_and_titles,
|
27 |
outputs=output_links
|
28 |
)
|
29 |
|