seawolf2357 commited on
Commit
9eb2a18
ยท
verified ยท
1 Parent(s): a280e58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -8,14 +8,17 @@ def fetch_pdf_links_and_titles():
8
  response = requests.get(url)
9
  soup = BeautifulSoup(response.text, 'html.parser')
10
 
 
 
11
  # ๋ชจ๋“  PDF ๋งํฌ์™€ ์ œ๋ชฉ์„ ์ฐพ์Šต๋‹ˆ๋‹ค.
12
  pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
13
- links_and_titles = []
14
  for link in pdf_links:
15
  title = link.text.strip() # ๋งํฌ ํ…์ŠคํŠธ์—์„œ ์ œ๋ชฉ ์ถ”์ถœ
16
  full_url = link['href']
17
- # ๋‹ค์šด๋กœ๋“œ ๊ฐ€๋Šฅํ•œ ๋งํฌ ํ˜•ํƒœ๋กœ ์ €์žฅ
18
- links_and_titles.append([title, f"<a href='{full_url}' download='{full_url.split('/')[-1]}'>{full_url}</a>"])
 
 
19
  return links_and_titles
20
 
21
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค
 
8
  response = requests.get(url)
9
  soup = BeautifulSoup(response.text, 'html.parser')
10
 
11
+ seen_urls = set()
12
+ links_and_titles = []
13
  # ๋ชจ๋“  PDF ๋งํฌ์™€ ์ œ๋ชฉ์„ ์ฐพ์Šต๋‹ˆ๋‹ค.
14
  pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
 
15
  for link in pdf_links:
16
  title = link.text.strip() # ๋งํฌ ํ…์ŠคํŠธ์—์„œ ์ œ๋ชฉ ์ถ”์ถœ
17
  full_url = link['href']
18
+ if full_url not in seen_urls:
19
+ seen_urls.add(full_url)
20
+ # ์ค‘๋ณต ์ œ๊ฑฐ ํ›„ ๋‹ค์šด๋กœ๋“œ ๊ฐ€๋Šฅํ•œ ๋งํฌ ํ˜•ํƒœ๋กœ ์ €์žฅ
21
+ links_and_titles.append([title, f"<a href='{full_url}' download='{full_url.split('/')[-1]}'>{full_url}</a>"])
22
  return links_and_titles
23
 
24
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค