Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
def fetch_pdf_links_and_titles(): | |
try: | |
url = "https://finance.naver.com/research/company_list.naver" | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
seen_urls = set() | |
links_html = "<div style='margin-top: 20px;'>" | |
pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$")) | |
for link in pdf_links: | |
title = link.text.strip() | |
full_url = link['href'] | |
if full_url not in seen_urls: | |
seen_urls.add(full_url) | |
# HTML 문자열로 링크를 추가하며 타이틀과 URL을 포함 | |
links_html += f"<p><a href='{full_url}' download='{full_url.split('/')[-1]}'>{title}</a></p>" | |
links_html += "</div>" | |
return links_html if links_html else "No PDF links found." | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
# Gradio 인터페이스 | |
with gr.Blocks() as app: | |
btn_fetch = gr.Button("PDF 링크 및 정보 조회") | |
output_html = gr.HTML() | |
btn_fetch.click( | |
fn=fetch_pdf_links_and_titles, | |
outputs=output_html | |
) | |
app.launch() | |