springwater commited on
Commit
7327597
·
verified ·
1 Parent(s): b1a3ea2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -15
app.py CHANGED
@@ -3,36 +3,32 @@ import re
3
  import requests
4
  from bs4 import BeautifulSoup
5
 
6
- def extract_pdf_links_and_title(url):
7
  response = requests.get(url)
8
  soup = BeautifulSoup(response.text, 'html.parser')
9
 
10
- # 페이지 제목 추출
11
- page_title = soup.title.text if soup.title else "No title found"
12
-
13
  pdf_links = []
14
  for link in soup.find_all('a', href=True):
15
  if re.search(r'\.pdf', link['href']):
16
  pdf_links.append(link['href'])
17
 
18
- # PDF 링크와 페이지 제목을 반환
19
- return pdf_links[:100], page_title
20
-
21
- def generate_html(pdf_links_and_title):
22
- pdf_links = pdf_links_and_title[0] # PDF 링크 리스트
23
- page_title = pdf_links_and_title[1] # 페이지 제목
24
 
25
- html = f"<h1>{page_title}</h1>" # 제목을 HTML에 추가
 
26
  for link in pdf_links:
27
  html += f'<a href="{link}" target="_blank" download>{link}</a><br/>'
28
-
29
  return html
30
 
31
- title = "네이버 증권 리서치 링크- https://finance.naver.com/research/company_list.naver"
 
 
 
 
32
 
33
- iface = gr.Interface(fn=extract_pdf_links_and_title,
34
  inputs="text",
35
- outputs=["text", "html"],
36
  title=title)
37
 
38
  iface.launch()
 
3
  import requests
4
  from bs4 import BeautifulSoup
5
 
6
+ def extract_pdf_links(url):
7
  response = requests.get(url)
8
  soup = BeautifulSoup(response.text, 'html.parser')
9
 
 
 
 
10
  pdf_links = []
11
  for link in soup.find_all('a', href=True):
12
  if re.search(r'\.pdf', link['href']):
13
  pdf_links.append(link['href'])
14
 
15
+ return pdf_links[:100]
 
 
 
 
 
16
 
17
+ def generate_html(pdf_links):
18
+ html = ""
19
  for link in pdf_links:
20
  html += f'<a href="{link}" target="_blank" download>{link}</a><br/>'
 
21
  return html
22
 
23
+ def extract_and_download(url):
24
+ pdf_links = extract_pdf_links(url)
25
+ return generate_html(pdf_links)
26
+
27
+ title = "네이버 증권 리서치 링크- https://finance.naver.com/research/company_list.naver"
28
 
29
+ iface = gr.Interface(extract_and_download,
30
  inputs="text",
31
+ outputs="html",
32
  title=title)
33
 
34
  iface.launch()