Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,14 +8,17 @@ def fetch_pdf_links_and_titles():
|
|
8 |
response = requests.get(url)
|
9 |
soup = BeautifulSoup(response.text, 'html.parser')
|
10 |
|
|
|
|
|
11 |
# ๋ชจ๋ PDF ๋งํฌ์ ์ ๋ชฉ์ ์ฐพ์ต๋๋ค.
|
12 |
pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
|
13 |
-
links_and_titles = []
|
14 |
for link in pdf_links:
|
15 |
title = link.text.strip() # ๋งํฌ ํ
์คํธ์์ ์ ๋ชฉ ์ถ์ถ
|
16 |
full_url = link['href']
|
17 |
-
|
18 |
-
|
|
|
|
|
19 |
return links_and_titles
|
20 |
|
21 |
# Gradio ์ธํฐํ์ด์ค
|
|
|
8 |
response = requests.get(url)
|
9 |
soup = BeautifulSoup(response.text, 'html.parser')
|
10 |
|
11 |
+
seen_urls = set()
|
12 |
+
links_and_titles = []
|
13 |
# ๋ชจ๋ PDF ๋งํฌ์ ์ ๋ชฉ์ ์ฐพ์ต๋๋ค.
|
14 |
pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
|
|
|
15 |
for link in pdf_links:
|
16 |
title = link.text.strip() # ๋งํฌ ํ
์คํธ์์ ์ ๋ชฉ ์ถ์ถ
|
17 |
full_url = link['href']
|
18 |
+
if full_url not in seen_urls:
|
19 |
+
seen_urls.add(full_url)
|
20 |
+
# ์ค๋ณต ์ ๊ฑฐ ํ ๋ค์ด๋ก๋ ๊ฐ๋ฅํ ๋งํฌ ํํ๋ก ์ ์ฅ
|
21 |
+
links_and_titles.append([title, f"<a href='{full_url}' download='{full_url.split('/')[-1]}'>{full_url}</a>"])
|
22 |
return links_and_titles
|
23 |
|
24 |
# Gradio ์ธํฐํ์ด์ค
|