File size: 1,107 Bytes
738953f
43a0009
 
 
 
a280e58
43a0009
 
 
 
a280e58
 
 
0750144
a280e58
 
9efc58b
a280e58
 
0750144
43a0009
 
a280e58
 
0750144
a280e58
0750144
 
43a0009
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import gradio as gr
import requests
from bs4 import BeautifulSoup
import re

def fetch_pdf_links_and_titles():
    url = "https://finance.naver.com/research/company_list.naver"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # ๋ชจ๋“  PDF ๋งํฌ์™€ ์ œ๋ชฉ์„ ์ฐพ์Šต๋‹ˆ๋‹ค.
    pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
    links_and_titles = []
    for link in pdf_links:
        title = link.text.strip()  # ๋งํฌ ํ…์ŠคํŠธ์—์„œ ์ œ๋ชฉ ์ถ”์ถœ
        full_url = link['href']
        # ๋‹ค์šด๋กœ๋“œ ๊ฐ€๋Šฅํ•œ ๋งํฌ ํ˜•ํƒœ๋กœ ์ €์žฅ
        links_and_titles.append([title, f"<a href='{full_url}' download='{full_url.split('/')[-1]}'>{full_url}</a>"])
    return links_and_titles

# Gradio ์ธํ„ฐํŽ˜์ด์Šค
with gr.Blocks() as app:
    btn_fetch = gr.Button("PDF ๋งํฌ ๋ฐ ์ •๋ณด ์กฐํšŒ")
    output_links = gr.Dataframe(headers=["Title", "PDF ๋งํฌ"], interactive=False)
    btn_fetch.click(
        fn=fetch_pdf_links_and_titles,
        outputs=output_links
    )

app.launch()