Spaces:
Sleeping
Sleeping
File size: 1,293 Bytes
738953f 43a0009 a280e58 7e27e95 6757069 7e27e95 6757069 7e27e95 0750144 43a0009 a280e58 cdbedd5 0750144 a280e58 cdbedd5 0750144 43a0009 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
import re
def fetch_pdf_links_and_titles():
try:
url = "https://finance.naver.com/research/company_list.naver"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
seen_urls = set()
links_html = "<div style='margin-top: 20px;'>"
pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
for link in pdf_links:
title = link.text.strip()
full_url = link['href']
if full_url not in seen_urls:
seen_urls.add(full_url)
# HTML 문자열로 링크를 추가하며 타이틀과 URL을 포함
links_html += f"<p><a href='{full_url}' download='{full_url.split('/')[-1]}'>{title}</a></p>"
links_html += "</div>"
return links_html if links_html else "No PDF links found."
except Exception as e:
return f"An error occurred: {str(e)}"
# Gradio 인터페이스
with gr.Blocks() as app:
btn_fetch = gr.Button("PDF 링크 및 정보 조회")
output_html = gr.HTML()
btn_fetch.click(
fn=fetch_pdf_links_and_titles,
outputs=output_html
)
app.launch()
|