Spaces:
Sleeping
Sleeping
File size: 1,107 Bytes
738953f 43a0009 a280e58 43a0009 a280e58 0750144 a280e58 9efc58b a280e58 0750144 43a0009 a280e58 0750144 a280e58 0750144 43a0009 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
import re
def fetch_pdf_links_and_titles():
url = "https://finance.naver.com/research/company_list.naver"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# ๋ชจ๋ PDF ๋งํฌ์ ์ ๋ชฉ์ ์ฐพ์ต๋๋ค.
pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
links_and_titles = []
for link in pdf_links:
title = link.text.strip() # ๋งํฌ ํ
์คํธ์์ ์ ๋ชฉ ์ถ์ถ
full_url = link['href']
# ๋ค์ด๋ก๋ ๊ฐ๋ฅํ ๋งํฌ ํํ๋ก ์ ์ฅ
links_and_titles.append([title, f"<a href='{full_url}' download='{full_url.split('/')[-1]}'>{full_url}</a>"])
return links_and_titles
# Gradio ์ธํฐํ์ด์ค
with gr.Blocks() as app:
btn_fetch = gr.Button("PDF ๋งํฌ ๋ฐ ์ ๋ณด ์กฐํ")
output_links = gr.Dataframe(headers=["Title", "PDF ๋งํฌ"], interactive=False)
btn_fetch.click(
fn=fetch_pdf_links_and_titles,
outputs=output_links
)
app.launch()
|