Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
def fetch_pdf_links_and_titles(): | |
url = "https://finance.naver.com/research/company_list.naver" | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# ๋ชจ๋ PDF ๋งํฌ์ ์ ๋ชฉ์ ์ฐพ์ต๋๋ค. | |
pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$")) | |
links_and_titles = [] | |
for link in pdf_links: | |
title = link.text.strip() # ๋งํฌ ํ ์คํธ์์ ์ ๋ชฉ ์ถ์ถ | |
full_url = link['href'] | |
# ๋ค์ด๋ก๋ ๊ฐ๋ฅํ ๋งํฌ ํํ๋ก ์ ์ฅ | |
links_and_titles.append([title, f"<a href='{full_url}' download='{full_url.split('/')[-1]}'>{full_url}</a>"]) | |
return links_and_titles | |
# Gradio ์ธํฐํ์ด์ค | |
with gr.Blocks() as app: | |
btn_fetch = gr.Button("PDF ๋งํฌ ๋ฐ ์ ๋ณด ์กฐํ") | |
output_links = gr.Dataframe(headers=["Title", "PDF ๋งํฌ"], interactive=False) | |
btn_fetch.click( | |
fn=fetch_pdf_links_and_titles, | |
outputs=output_links | |
) | |
app.launch() | |