Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -30,12 +30,17 @@ def download_html_and_files(url, subdir):
|
|
30 |
html_content = requests.get(url).text
|
31 |
soup = BeautifulSoup(html_content, 'html.parser')
|
32 |
base_url = urllib.parse.urlunparse(urllib.parse.urlparse(url)._replace(path='', params='', query='', fragment=''))
|
|
|
33 |
for link in soup.find_all('a'):
|
34 |
file_url = urllib.parse.urljoin(base_url, link.get('href'))
|
35 |
local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
|
36 |
-
|
|
|
|
|
37 |
link['href'] = local_filename
|
38 |
download_file(file_url, local_filename)
|
|
|
|
|
39 |
with open(os.path.join(subdir, "index.html"), "w") as file:
|
40 |
file.write(str(soup))
|
41 |
|
|
|
30 |
html_content = requests.get(url).text
|
31 |
soup = BeautifulSoup(html_content, 'html.parser')
|
32 |
base_url = urllib.parse.urlunparse(urllib.parse.urlparse(url)._replace(path='', params='', query='', fragment=''))
|
33 |
+
|
34 |
for link in soup.find_all('a'):
|
35 |
file_url = urllib.parse.urljoin(base_url, link.get('href'))
|
36 |
local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
|
37 |
+
|
38 |
+
# Skip if the local filename is a directory
|
39 |
+
if not local_filename.endswith('/') and local_filename != subdir:
|
40 |
link['href'] = local_filename
|
41 |
download_file(file_url, local_filename)
|
42 |
+
|
43 |
+
# Save the modified HTML content
|
44 |
with open(os.path.join(subdir, "index.html"), "w") as file:
|
45 |
file.write(str(soup))
|
46 |
|