seawolf2357 commited on
Commit
a280e58
ยท
verified ยท
1 Parent(s): 9efc58b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -3,26 +3,27 @@ import requests
3
  from bs4 import BeautifulSoup
4
  import re
5
 
6
- def fetch_pdf_links():
7
  url = "https://finance.naver.com/research/company_list.naver"
8
  response = requests.get(url)
9
  soup = BeautifulSoup(response.text, 'html.parser')
10
 
11
- # ๋ชจ๋“  PDF ๋งํฌ๋ฅผ ์ฐพ์Šต๋‹ˆ๋‹ค.
12
- pdf_links = soup.find_all('a', href=re.compile("\.pdf$"))
13
- links = []
14
  for link in pdf_links:
15
- full_url = "https://finance.naver.com" + link['href']
 
16
  # ๋‹ค์šด๋กœ๋“œ ๊ฐ€๋Šฅํ•œ ๋งํฌ ํ˜•ํƒœ๋กœ ์ €์žฅ
17
- links.append([f"<a href='{full_url}' download='{full_url.split('/')[-1]}'>{full_url.split('/')[-1]}</a>"])
18
- return links
19
 
20
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค
21
  with gr.Blocks() as app:
22
- btn_fetch = gr.Button("PDF ๋งํฌ ์กฐํšŒ")
23
- output_links = gr.Dataframe(headers=["PDF ๋งํฌ"], interactive=False)
24
  btn_fetch.click(
25
- fn=fetch_pdf_links,
26
  outputs=output_links
27
  )
28
 
 
3
  from bs4 import BeautifulSoup
4
  import re
5
 
6
+ def fetch_pdf_links_and_titles():
7
  url = "https://finance.naver.com/research/company_list.naver"
8
  response = requests.get(url)
9
  soup = BeautifulSoup(response.text, 'html.parser')
10
 
11
+ # ๋ชจ๋“  PDF ๋งํฌ์™€ ์ œ๋ชฉ์„ ์ฐพ์Šต๋‹ˆ๋‹ค.
12
+ pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
13
+ links_and_titles = []
14
  for link in pdf_links:
15
+ title = link.text.strip() # ๋งํฌ ํ…์ŠคํŠธ์—์„œ ์ œ๋ชฉ ์ถ”์ถœ
16
+ full_url = link['href']
17
  # ๋‹ค์šด๋กœ๋“œ ๊ฐ€๋Šฅํ•œ ๋งํฌ ํ˜•ํƒœ๋กœ ์ €์žฅ
18
+ links_and_titles.append([title, f"<a href='{full_url}' download='{full_url.split('/')[-1]}'>{full_url}</a>"])
19
+ return links_and_titles
20
 
21
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค
22
  with gr.Blocks() as app:
23
+ btn_fetch = gr.Button("PDF ๋งํฌ ๋ฐ ์ •๋ณด ์กฐํšŒ")
24
+ output_links = gr.Dataframe(headers=["Title", "PDF ๋งํฌ"], interactive=False)
25
  btn_fetch.click(
26
+ fn=fetch_pdf_links_and_titles,
27
  outputs=output_links
28
  )
29