|
import gradio as gr |
|
import re |
|
import requests |
|
from bs4 import BeautifulSoup |
|
|
|
def extract_pdf_links(url): |
|
response = requests.get(url) |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
pdf_links = [] |
|
for link in soup.find_all('a', href=True): |
|
if re.search(r'\.pdf', link['href']): |
|
pdf_links.append(link['href']) |
|
|
|
return pdf_links[:100] |
|
|
|
def generate_html(pdf_links): |
|
html = "" |
|
for link in pdf_links: |
|
html += f'<a href="{link}" target="_blank" download>{link}</a><br/>' |
|
return html |
|
|
|
|
|
title = "네이버 증권 리서치 링크- https://finance.naver.com/research/company_list.naver" |
|
|
|
iface = gr.Interface(extract_pdf_links, |
|
inputs="text", |
|
outputs="text", |
|
title=title) |
|
|
|
iface.launch() |