awacke1 commited on
Commit
66c145a
·
1 Parent(s): f8f0382

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -30
app.py CHANGED
@@ -6,12 +6,10 @@ import base64
6
  from bs4 import BeautifulSoup
7
  import hashlib
8
  import json
9
- import mimetypes
10
- import shutil
11
- from zipfile import ZipFile
12
 
13
- EXCLUDED_FILES = ['app.py', 'requirements.txt', 'pre-requirements.txt', 'packages.txt', 'README.md', '.gitattributes', "backup.py", "Dockerfile"]
14
 
 
15
  if not os.path.exists("history.json"):
16
  with open("history.json", "w") as f:
17
  json.dump({}, f)
@@ -37,10 +35,12 @@ def download_html_and_files(url, subdir):
37
  file_url = urllib.parse.urljoin(base_url, link.get('href'))
38
  local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
39
 
 
40
  if not local_filename.endswith('/') and local_filename != subdir:
41
  link['href'] = local_filename
42
  download_file(file_url, local_filename)
43
 
 
44
  with open(os.path.join(subdir, "index.html"), "w") as file:
45
  file.write(str(soup))
46
 
@@ -48,6 +48,36 @@ def list_files(directory_path='.'):
48
  files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
49
  return [f for f in files if f not in EXCLUDED_FILES]
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def get_download_link(file):
52
  with open(file, "rb") as f:
53
  bytes = f.read()
@@ -55,35 +85,16 @@ def get_download_link(file):
55
  href = f'<a href="data:file/octet-stream;base64,{b64}" download=\'{os.path.basename(file)}\'>Click to download {os.path.basename(file)}</a>'
56
  return href
57
 
58
- def delete_all_files():
59
- for root, dirs, files in os.walk(".", topdown=False):
60
- for name in files:
61
- if name not in EXCLUDED_FILES:
62
- os.remove(os.path.join(root, name))
63
- for name in dirs:
64
- shutil.rmtree(os.path.join(root, name))
65
- st.success("All files and folders deleted successfully!")
66
-
67
- def create_zip_and_get_link():
68
- zip_filename = "all_files.zip"
69
- with ZipFile(zip_filename, 'w') as zipf:
70
- for root, dirs, files in os.walk(".", topdown=False):
71
- for file in files:
72
- if file not in EXCLUDED_FILES and file != zip_filename:
73
- zipf.write(os.path.join(root, file))
74
- with open(zip_filename, "rb") as f:
75
- bytes = f.read()
76
- b64 = base64.b64encode(bytes).decode()
77
- href = f'<a href="data:file/zip;base64,{b64}" download=\'{zip_filename}\'>🔽 Download All Files</a>'
78
- st.markdown(href, unsafe_allow_html=True)
79
 
80
  def main():
81
  st.sidebar.title('Web Datasets Bulk Downloader')
82
  url = st.sidebar.text_input('Please enter a Web URL to bulk download text and files')
83
 
 
84
  with open("history.json", "r") as f:
85
  history = json.load(f)
86
 
 
87
  if url:
88
  subdir = hashlib.md5(url.encode()).hexdigest()
89
  if not os.path.exists(subdir):
@@ -101,13 +112,11 @@ def main():
101
  for subdir in history.values():
102
  show_download_links(subdir)
103
 
 
104
  with st.expander("URL History and Downloaded Files"):
105
  for url, subdir in history.items():
106
  st.markdown(f"#### {url}")
107
  show_download_links(subdir)
108
 
109
- if st.sidebar.button('🗑️ Delete All'):
110
- delete_all_files()
111
-
112
- if st.sidebar.button('📦 Download All'):
113
- create_zip_and_get_link
 
6
  from bs4 import BeautifulSoup
7
  import hashlib
8
  import json
 
 
 
9
 
10
+ EXCLUDED_FILES = ['app.py', 'requirements.txt', 'pre-requirements.txt', 'packages.txt', 'README.md','.gitattributes', "backup.py","Dockerfile"]
11
 
12
+ # Create a history.json file if it doesn't exist yet
13
  if not os.path.exists("history.json"):
14
  with open("history.json", "w") as f:
15
  json.dump({}, f)
 
35
  file_url = urllib.parse.urljoin(base_url, link.get('href'))
36
  local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
37
 
38
+ # Skip if the local filename is a directory
39
  if not local_filename.endswith('/') and local_filename != subdir:
40
  link['href'] = local_filename
41
  download_file(file_url, local_filename)
42
 
43
+ # Save the modified HTML content
44
  with open(os.path.join(subdir, "index.html"), "w") as file:
45
  file.write(str(soup))
46
 
 
48
  files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
49
  return [f for f in files if f not in EXCLUDED_FILES]
50
 
51
+
52
+ def show_file_operations(file_path):
53
+ st.write(f"File: {os.path.basename(file_path)}")
54
+
55
+ # Edit button
56
+ if st.button(f"✏️ Edit {os.path.basename(file_path)}"):
57
+ with open(file_path, "r") as f:
58
+ file_content = f.read()
59
+ file_content = st.text_area("Edit the file content:", value=file_content, height=250)
60
+ if st.button(f"💾 Save {os.path.basename(file_path)}"):
61
+ with open(file_path, "w") as f:
62
+ f.write(file_content)
63
+ st.success(f"File {os.path.basename(file_path)} saved!")
64
+
65
+ # Delete button
66
+ if st.button(f"🗑️ Delete {os.path.basename(file_path)}"):
67
+ os.remove(file_path)
68
+ st.markdown(f"🎉 File {os.path.basename(file_path)} deleted!")
69
+
70
+ def show_download_links(subdir):
71
+ st.write(f'Files for {subdir}:')
72
+ for file in list_files(subdir):
73
+ file_path = os.path.join(subdir, file)
74
+ if os.path.isfile(file_path):
75
+ st.markdown(get_download_link(file_path), unsafe_allow_html=True)
76
+ show_file_operations(file_path)
77
+ else:
78
+ st.write(f"File not found: {file}")
79
+
80
+
81
  def get_download_link(file):
82
  with open(file, "rb") as f:
83
  bytes = f.read()
 
85
  href = f'<a href="data:file/octet-stream;base64,{b64}" download=\'{os.path.basename(file)}\'>Click to download {os.path.basename(file)}</a>'
86
  return href
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  def main():
90
  st.sidebar.title('Web Datasets Bulk Downloader')
91
  url = st.sidebar.text_input('Please enter a Web URL to bulk download text and files')
92
 
93
+ # Load history
94
  with open("history.json", "r") as f:
95
  history = json.load(f)
96
 
97
+ # Save the history of URL entered as a json file
98
  if url:
99
  subdir = hashlib.md5(url.encode()).hexdigest()
100
  if not os.path.exists(subdir):
 
112
  for subdir in history.values():
113
  show_download_links(subdir)
114
 
115
+ # Display history as markdown
116
  with st.expander("URL History and Downloaded Files"):
117
  for url, subdir in history.items():
118
  st.markdown(f"#### {url}")
119
  show_download_links(subdir)
120
 
121
+ if __name__ == "__main__":
122
+ main()