awacke1 commited on
Commit
c0a3424
·
verified ·
1 Parent(s): f549614

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -114
app.py CHANGED
@@ -8,6 +8,12 @@ import plotly.express as px
8
  import zipfile
9
  import tempfile
10
  import shutil
 
 
 
 
 
 
11
 
12
  # Initialize the Hugging Face API
13
  api = HfApi()
@@ -89,126 +95,179 @@ def perform_snapshot_download(repo_id, repo_type):
89
  except Exception as e:
90
  return str(e)
91
 
92
- st.title("Hugging Face User Page Downloader & Zipper 📄➕📦")
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- user_input = st.text_area(
95
- "Enter Hugging Face usernames (one per line):",
96
- value="\n".join(default_users["users"]),
97
- height=300
98
- )
 
 
 
 
 
99
 
100
- if st.button("Show User Content and Download Snapshots"):
101
- if user_input:
102
- username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
103
-
104
- user_data_list = asyncio.run(fetch_all_users(username_list))
105
-
106
- stats = {"username": [], "models_count": [], "datasets_count": []}
107
- successful_html_files = []
108
- snapshot_downloads = []
109
-
110
- st.markdown("### User Content Overview")
111
- for user_data in user_data_list:
112
- username = user_data["username"]
113
- with st.container():
114
- st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")
115
-
116
- if "error" in user_data:
117
- st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
118
- else:
119
- models = user_data["models"]
120
- datasets = user_data["datasets"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- html_file_path, html_content, download_error = download_user_page(username)
123
- if html_file_path and html_content:
124
- successful_html_files.append(html_file_path)
125
- st.success(f"✅ Successfully downloaded {username}'s page.")
126
-
127
- # Add expander to view HTML content
128
- with st.expander(f"View {username}'s HTML page"):
129
- st.markdown(html_content, unsafe_allow_html=True)
130
  else:
131
- st.error(f"❌ Failed to download {username}'s page: {download_error}")
132
-
133
- stats["username"].append(username)
134
- stats["models_count"].append(len(models))
135
- stats["datasets_count"].append(len(datasets))
136
-
137
- with st.expander(f"🧠 Models ({len(models)})", expanded=False):
138
- if models:
139
- for model in models:
140
- model_name = model.modelId.split("/")[-1]
141
- st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
142
- if st.button(f"Download Snapshot: {model_name}", key=f"model_{model.modelId}"):
143
- with st.spinner(f"Downloading snapshot for {model_name}..."):
144
- result = perform_snapshot_download(model.modelId, "model")
145
- if isinstance(result, str):
146
- st.error(f"Failed to download {model_name}: {result}")
147
- else:
148
- snapshot_downloads.append(result)
149
- st.success(f"Successfully downloaded snapshot for {model_name}")
150
  else:
151
- st.markdown("No models found. 🤷‍♂️")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
- with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
154
- if datasets:
155
- for dataset in datasets:
156
- dataset_name = dataset.id.split("/")[-1]
157
- st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
158
- if st.button(f"Download Snapshot: {dataset_name}", key=f"dataset_{dataset.id}"):
159
- with st.spinner(f"Downloading snapshot for {dataset_name}..."):
160
- result = perform_snapshot_download(dataset.id, "dataset")
161
- if isinstance(result, str):
162
- st.error(f"Failed to download {dataset_name}: {result}")
163
- else:
164
- snapshot_downloads.append(result)
165
- st.success(f"Successfully downloaded snapshot for {dataset_name}")
166
- else:
167
- st.markdown("No datasets found. 🤷‍♀️")
168
-
169
- st.markdown("---")
170
-
171
- if successful_html_files:
172
- html_zip_path = create_zip_of_files(successful_html_files, "HuggingFace_User_Pages.zip")
173
- html_download_link = get_download_link(html_zip_path, "📥 Download All HTML Pages as ZIP")
174
- st.markdown(html_download_link, unsafe_allow_html=True)
175
- else:
176
- st.warning("No HTML files were successfully downloaded to create a ZIP archive.")
177
-
178
- if snapshot_downloads:
179
- snapshot_zip_path = create_zip_of_files(snapshot_downloads, "HuggingFace_Snapshots.zip")
180
- snapshot_download_link = get_download_link(snapshot_zip_path, "📥 Download All Snapshots as ZIP")
181
- st.markdown(snapshot_download_link, unsafe_allow_html=True)
182
-
183
- if stats["username"]:
184
- st.markdown("### User Content Statistics")
185
 
186
- fig_models = px.bar(
187
- x=stats["username"],
188
- y=stats["models_count"],
189
- labels={'x': 'Username', 'y': 'Number of Models'},
190
- title="Number of Models per User"
191
- )
192
- st.plotly_chart(fig_models)
193
 
194
- fig_datasets = px.bar(
195
- x=stats["username"],
196
- y=stats["datasets_count"],
197
- labels={'x': 'Username', 'y': 'Number of Datasets'},
198
- title="Number of Datasets per User"
199
- )
200
- st.plotly_chart(fig_datasets)
 
 
 
 
 
 
 
 
 
 
 
201
 
202
- else:
203
- st.warning("Please enter at least one username. Don't be shy! 😅")
204
-
205
- st.sidebar.markdown("""
206
- ## How to use:
207
- 1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
208
- 2. Click **'Show User Content and Download Snapshots'**.
209
- 3. View each user's models and datasets along with a link to their Hugging Face profile.
210
- 4. For each model or dataset, you can click the "Download Snapshot" button to download a snapshot.
211
- 5. **Download ZIP archives** containing all the HTML pages and snapshots by clicking the download links.
212
- 6. Check out the statistics visualizations below!
213
- 7. **New feature:** You can now view the HTML content of each user's page by clicking on the expander.
214
- """)
 
 
 
 
 
 
8
  import zipfile
9
  import tempfile
10
  import shutil
11
+ from bs4 import BeautifulSoup
12
+ from PIL import Image
13
+ import glob
14
+ from datetime import datetime
15
+ import pytz
16
+ from urllib.parse import quote
17
 
18
  # Initialize the Hugging Face API
19
  api = HfApi()
 
95
  except Exception as e:
96
  return str(e)
97
 
98
+ # New function to display HTML files in a grid
99
+ def display_html_grid(html_files):
100
+ num_columns = 3 # You can adjust this number
101
+ for i in range(0, len(html_files), num_columns):
102
+ cols = st.columns(num_columns)
103
+ for j in range(num_columns):
104
+ if i + j < len(html_files):
105
+ with cols[j]:
106
+ with open(html_files[i+j], 'r', encoding='utf-8') as file:
107
+ html_content = file.read()
108
+ soup = BeautifulSoup(html_content, 'html.parser')
109
+ st.subheader(f"Page: {os.path.basename(html_files[i+j])}")
110
+ st.components.v1.html(str(soup.body), height=300, scrolling=True)
111
 
112
+ # New function to extract and display images from HTML
113
+ def display_images_from_html(html_file):
114
+ with open(html_file, 'r', encoding='utf-8') as file:
115
+ html_content = file.read()
116
+ soup = BeautifulSoup(html_content, 'html.parser')
117
+ images = soup.find_all('img')
118
+ for img in images:
119
+ src = img.get('src')
120
+ if src and src.startswith('http'):
121
+ st.image(src, use_column_width=True)
122
 
123
+ # New function to extract and display videos from HTML
124
+ def display_videos_from_html(html_file):
125
+ with open(html_file, 'r', encoding='utf-8') as file:
126
+ html_content = file.read()
127
+ soup = BeautifulSoup(html_content, 'html.parser')
128
+ videos = soup.find_all('video')
129
+ for video in videos:
130
+ src = video.find('source').get('src')
131
+ if src and src.startswith('http'):
132
+ st.video(src)
133
+
134
+ def main():
135
+ st.title("Hugging Face User Page Downloader & Zipper 📄➕📦")
136
+
137
+ user_input = st.text_area(
138
+ "Enter Hugging Face usernames (one per line):",
139
+ value="\n".join(default_users["users"]),
140
+ height=300
141
+ )
142
+
143
+ if st.button("Show User Content and Download Snapshots"):
144
+ if user_input:
145
+ username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
146
+
147
+ user_data_list = asyncio.run(fetch_all_users(username_list))
148
+
149
+ stats = {"username": [], "models_count": [], "datasets_count": []}
150
+ successful_html_files = []
151
+ snapshot_downloads = []
152
+
153
+ st.markdown("### User Content Overview")
154
+ for user_data in user_data_list:
155
+ username = user_data["username"]
156
+ with st.container():
157
+ st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")
158
 
159
+ if "error" in user_data:
160
+ st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
 
 
 
 
 
 
161
  else:
162
+ models = user_data["models"]
163
+ datasets = user_data["datasets"]
164
+
165
+ html_file_path, html_content, download_error = download_user_page(username)
166
+ if html_file_path and html_content:
167
+ successful_html_files.append(html_file_path)
168
+ st.success(f" Successfully downloaded {username}'s page.")
169
+
170
+ # Add expander to view HTML content
171
+ with st.expander(f"View {username}'s HTML page"):
172
+ st.markdown(html_content, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
173
  else:
174
+ st.error(f" Failed to download {username}'s page: {download_error}")
175
+
176
+ stats["username"].append(username)
177
+ stats["models_count"].append(len(models))
178
+ stats["datasets_count"].append(len(datasets))
179
+
180
+ with st.expander(f"🧠 Models ({len(models)})", expanded=False):
181
+ if models:
182
+ for model in models:
183
+ model_name = model.modelId.split("/")[-1]
184
+ st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
185
+ if st.button(f"Download Snapshot: {model_name}", key=f"model_{model.modelId}"):
186
+ with st.spinner(f"Downloading snapshot for {model_name}..."):
187
+ result = perform_snapshot_download(model.modelId, "model")
188
+ if isinstance(result, str):
189
+ st.error(f"Failed to download {model_name}: {result}")
190
+ else:
191
+ snapshot_downloads.append(result)
192
+ st.success(f"Successfully downloaded snapshot for {model_name}")
193
+ else:
194
+ st.markdown("No models found. 🤷‍♂️")
195
+
196
+ with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
197
+ if datasets:
198
+ for dataset in datasets:
199
+ dataset_name = dataset.id.split("/")[-1]
200
+ st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
201
+ if st.button(f"Download Snapshot: {dataset_name}", key=f"dataset_{dataset.id}"):
202
+ with st.spinner(f"Downloading snapshot for {dataset_name}..."):
203
+ result = perform_snapshot_download(dataset.id, "dataset")
204
+ if isinstance(result, str):
205
+ st.error(f"Failed to download {dataset_name}: {result}")
206
+ else:
207
+ snapshot_downloads.append(result)
208
+ st.success(f"Successfully downloaded snapshot for {dataset_name}")
209
+ else:
210
+ st.markdown("No datasets found. 🤷‍♀️")
211
 
212
+ st.markdown("---")
213
+
214
+ if successful_html_files:
215
+ st.markdown("### HTML Grid View")
216
+ display_html_grid(successful_html_files)
217
+
218
+ st.markdown("### Image Gallery")
219
+ for html_file in successful_html_files:
220
+ display_images_from_html(html_file)
221
+
222
+ st.markdown("### Video Gallery")
223
+ for html_file in successful_html_files:
224
+ display_videos_from_html(html_file)
225
+
226
+ html_zip_path = create_zip_of_files(successful_html_files, "HuggingFace_User_Pages.zip")
227
+ html_download_link = get_download_link(html_zip_path, "📥 Download All HTML Pages as ZIP")
228
+ st.markdown(html_download_link, unsafe_allow_html=True)
229
+ else:
230
+ st.warning("No HTML files were successfully downloaded to create a ZIP archive.")
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
+ if snapshot_downloads:
233
+ snapshot_zip_path = create_zip_of_files(snapshot_downloads, "HuggingFace_Snapshots.zip")
234
+ snapshot_download_link = get_download_link(snapshot_zip_path, "📥 Download All Snapshots as ZIP")
235
+ st.markdown(snapshot_download_link, unsafe_allow_html=True)
 
 
 
236
 
237
+ if stats["username"]:
238
+ st.markdown("### User Content Statistics")
239
+
240
+ fig_models = px.bar(
241
+ x=stats["username"],
242
+ y=stats["models_count"],
243
+ labels={'x': 'Username', 'y': 'Number of Models'},
244
+ title="Number of Models per User"
245
+ )
246
+ st.plotly_chart(fig_models)
247
+
248
+ fig_datasets = px.bar(
249
+ x=stats["username"],
250
+ y=stats["datasets_count"],
251
+ labels={'x': 'Username', 'y': 'Number of Datasets'},
252
+ title="Number of Datasets per User"
253
+ )
254
+ st.plotly_chart(fig_datasets)
255
 
256
+ else:
257
+ st.warning("Please enter at least one username. Don't be shy! 😅")
258
+
259
+ st.sidebar.markdown("""
260
+ ## How to use:
261
+ 1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
262
+ 2. Click **'Show User Content and Download Snapshots'**.
263
+ 3. View each user's models and datasets along with a link to their Hugging Face profile.
264
+ 4. For each model or dataset, you can click the "Download Snapshot" button to download a snapshot.
265
+ 5. **Download ZIP archives** containing all the HTML pages and snapshots by clicking the download links.
266
+ 6. Check out the statistics visualizations below!
267
+ 7. **New features:**
268
+ - View all downloaded HTML pages in a grid layout
269
+ - Browse through image and video galleries extracted from the HTML pages
270
+ """)
271
+
272
+ if __name__ == "__main__":
273
+ main()