|
import gradio as gr |
|
import requests |
|
from bs4 import BeautifulSoup |
|
import re |
|
|
|
def fetch_pdf_links_and_titles(): |
|
try: |
|
url = "https://finance.naver.com/research/company_list.naver" |
|
response = requests.get(url) |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
seen_urls = set() |
|
links_html = "<div style='margin-top: 20px;'>" |
|
pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$")) |
|
for link in pdf_links: |
|
title = link.text.strip() |
|
full_url = link['href'] |
|
if full_url not in seen_urls: |
|
seen_urls.add(full_url) |
|
|
|
links_html += f"<p><a href='{full_url}' download='{full_url.split('/')[-1]}'>{title}</a></p>" |
|
links_html += "</div>" |
|
return links_html if links_html else "No PDF links found." |
|
except Exception as e: |
|
return f"An error occurred: {str(e)}" |
|
|
|
|
|
with gr.Blocks() as app: |
|
btn_fetch = gr.Button("PDF 링크 및 정보 조회") |
|
output_html = gr.HTML() |
|
btn_fetch.click( |
|
fn=fetch_pdf_links_and_titles, |
|
outputs=output_html |
|
) |
|
|
|
app.launch() |
|
|