import gradio as gr import requests from bs4 import BeautifulSoup import re def fetch_pdf_links_and_titles(): url = "https://finance.naver.com/research/company_list.naver" response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') # 모든 PDF 링크와 제목을 찾습니다. pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$")) links_and_titles = [] for link in pdf_links: title = link.text.strip() # 링크 텍스트에서 제목 추출 full_url = link['href'] # 다운로드 가능한 링크 형태로 저장 links_and_titles.append([title, f"{full_url}"]) return links_and_titles # Gradio 인터페이스 with gr.Blocks() as app: btn_fetch = gr.Button("PDF 링크 및 정보 조회") output_links = gr.Dataframe(headers=["Title", "PDF 링크"], interactive=False) btn_fetch.click( fn=fetch_pdf_links_and_titles, outputs=output_links ) app.launch()