urlcrawl / app.py
seawolf2357's picture
Update app.py
43a0009 verified
raw
history blame
843 Bytes
import gradio as gr
import requests
from bs4 import BeautifulSoup
import re
def download_first_pdf():
url = "https://finance.naver.com/research/company_list.naver"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# 첫 번째 PDF 링크를 찾습니다.
pdf_link = soup.find('a', href=re.compile("\.pdf$"))
if pdf_link:
pdf_url = f"https://finance.naver.com{pdf_link['href']}"
return pdf_url
else:
return "PDF 링크를 찾을 수 없습니다."
# Gradio 인터페이스
with gr.Blocks() as app:
gr.Markdown("### 네이버 금융 리서치 보고서 PDF 다운로더")
btn_download = gr.Button("첫 번째 PDF 다운로드")
output = gr.Textbox(label="PDF 링크")
btn_download.click(fn=download_first_pdf, outputs=output)
app.launch()