seawolf2357 commited on
Commit
4012bf8
·
verified ·
1 Parent(s): 6757069

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -29
app.py CHANGED
@@ -1,36 +1,27 @@
1
  import gradio as gr
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
- import re
5
 
6
- def fetch_pdf_links_and_titles():
7
- try:
8
- url = "https://finance.naver.com/research/company_list.naver"
9
- response = requests.get(url)
10
- soup = BeautifulSoup(response.text, 'html.parser')
 
 
 
 
 
11
 
12
- seen_urls = set()
13
- links_html = "<div style='margin-top: 20px;'>"
14
- pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
15
- for link in pdf_links:
16
- title = link.text.strip()
17
- full_url = link['href']
18
- if full_url not in seen_urls:
19
- seen_urls.add(full_url)
20
- # HTML 문자열로 링크를 추가하며 타이틀과 URL을 포함
21
- links_html += f"<p><a href='{full_url}' download='{full_url.split('/')[-1]}'>{title}</a></p>"
22
- links_html += "</div>"
23
- return links_html if links_html else "No PDF links found."
24
- except Exception as e:
25
- return f"An error occurred: {str(e)}"
26
 
27
- # Gradio 인터페이스
28
- with gr.Blocks() as app:
29
- btn_fetch = gr.Button("PDF 링크 및 정보 조회")
30
- output_html = gr.HTML()
31
- btn_fetch.click(
32
- fn=fetch_pdf_links_and_titles,
33
- outputs=output_html
34
- )
35
 
36
- app.launch()
 
1
  import gradio as gr
2
+ import re
3
  import requests
4
  from bs4 import BeautifulSoup
 
5
 
6
+ def extract_pdf_links(url):
7
+ response = requests.get(url)
8
+ soup = BeautifulSoup(response.text, 'html.parser')
9
+
10
+ pdf_links = []
11
+ for link in soup.find_all('a', href=True):
12
+ if re.search(r'\.pdf', link['href']):
13
+ pdf_links.append(link['href'])
14
+
15
+ return pdf_links[:10]
16
 
17
+ def generate_html(pdf_links):
18
+ html = ""
19
+ for link in pdf_links:
20
+ html += f'<a href="{link}" target="_blank">{link}</a><br/>'
21
+ return html
 
 
 
 
 
 
 
 
 
22
 
23
+ iface = gr.Interface(extract_pdf_links,
24
+ inputs="text",
25
+ outputs="text")
 
 
 
 
 
26
 
27
+ iface.launch()