springwater commited on
Commit
dc39e39
·
verified ·
1 Parent(s): 0290677

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -4
app.py CHANGED
@@ -4,6 +4,10 @@ import requests
4
  from bs4 import BeautifulSoup
5
 
6
  def extract_pdf_links(url):
 
 
 
 
7
  response = requests.get(url)
8
  soup = BeautifulSoup(response.text, 'html.parser')
9
 
@@ -14,18 +18,27 @@ def extract_pdf_links(url):
14
 
15
  return pdf_links[:100]
16
 
 
 
 
 
17
  def generate_html(pdf_links):
18
  html = ""
19
  for link in pdf_links:
20
  html += f'<a href="{link}" target="_blank" download>{link}</a><br/>'
21
  return html
22
 
 
 
 
 
 
23
 
24
- title = "네이버 증권 리서치 링크- https://finance.naver.com/research/company_list.naver"
25
 
26
- iface = gr.Interface(extract_pdf_links,
27
- inputs="text",
28
  outputs="text",
29
  title=title)
30
 
31
- iface.launch()
 
4
  from bs4 import BeautifulSoup
5
 
6
  def extract_pdf_links(url):
7
+ # URL 유효성 검사 추가
8
+ if not re.match(r'http[s]?://', url):
9
+ return ["Invalid URL"]
10
+
11
  response = requests.get(url)
12
  soup = BeautifulSoup(response.text, 'html.parser')
13
 
 
18
 
19
  return pdf_links[:100]
20
 
21
+ def filter_links_by_keyword(pdf_links, keyword):
22
+ filtered_links = [link for link in pdf_links if keyword.lower() in link.lower()]
23
+ return filtered_links
24
+
25
  def generate_html(pdf_links):
26
  html = ""
27
  for link in pdf_links:
28
  html += f'<a href="{link}" target="_blank" download>{link}</a><br/>'
29
  return html
30
 
31
+ def main(url, keyword):
32
+ pdf_links = extract_pdf_links(url)
33
+ if keyword: # 키워드가 비어있지 않은 경우에만 필터링
34
+ pdf_links = filter_links_by_keyword(pdf_links, keyword)
35
+ return generate_html(pdf_links)
36
 
37
+ title = "네이버 증권 리서치 링크 - https://finance.naver.com/research/company_list.naver"
38
 
39
+ iface = gr.Interface(main,
40
+ inputs=["text", "text"], # URL과 키워드 입력
41
  outputs="text",
42
  title=title)
43
 
44
+ iface.launch()