awacke1 commited on
Commit
d34785a
·
verified ·
1 Parent(s): 94380fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -52
app.py CHANGED
@@ -5,6 +5,7 @@ import os
5
  import asyncio
6
  from huggingface_hub import HfApi
7
  import plotly.express as px
 
8
 
9
  # Initialize the Hugging Face API
10
  api = HfApi()
@@ -14,6 +15,11 @@ HTML_DIR = "generated_html_pages"
14
  if not os.path.exists(HTML_DIR):
15
  os.makedirs(HTML_DIR)
16
 
 
 
 
 
 
17
  # Default list of Hugging Face usernames
18
  default_users = {
19
  "users": [
@@ -40,11 +46,6 @@ async def fetch_user_content(username):
40
  except Exception as e:
41
  return {"username": username, "error": str(e)}
42
 
43
- # Fetch all users concurrently
44
- async def fetch_all_users(usernames):
45
- tasks = [fetch_user_content(username) for username in usernames]
46
- return await asyncio.gather(*tasks)
47
-
48
  # Function to download the user page using requests
49
  def download_user_page(username):
50
  url = f"https://huggingface.co/{username}"
@@ -59,29 +60,45 @@ def download_user_page(username):
59
  except Exception as e:
60
  return None, str(e)
61
 
62
- # Function to base64 encode the HTML file
63
- def encode_html_to_base64(html_file_path):
64
- try:
65
- with open(html_file_path, "rb") as file:
66
- encoded_bytes = base64.b64encode(file.read())
67
- encoded_str = encoded_bytes.decode('utf-8')
68
- return encoded_str, None
69
- except Exception as e:
70
- return None, str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- # Cache the downloaded and encoded content to avoid redundant operations
73
- @st.cache_data(show_spinner=False, ttl=3600)
74
- def get_cached_base64_html(username):
75
- html_file_path, error = download_user_page(username)
76
- if error:
77
- return None, error
78
- encoded_str, encode_error = encode_html_to_base64(html_file_path)
79
- if encode_error:
80
- return None, encode_error
81
- return encoded_str, None
 
82
 
83
  # Streamlit app setup
84
- st.title("Hugging Face User Page Downloader 📄✨")
85
 
86
  # Text area with default list of usernames
87
  user_input = st.text_area(
@@ -95,42 +112,41 @@ if st.button("Show User Content"):
95
  if user_input:
96
  username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
97
 
 
 
 
98
  # Collect statistics for Plotly graphs
99
  stats = {"username": [], "models_count": [], "datasets_count": []}
100
-
 
 
 
101
  st.markdown("### User Content Overview")
102
- for username in username_list:
 
103
  with st.container():
104
  # Profile link
105
  st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")
106
-
107
- # Fetch models and datasets
108
- user_data = asyncio.run(fetch_user_content(username))
109
  if "error" in user_data:
110
  st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
111
  else:
112
  models = user_data["models"]
113
  datasets = user_data["datasets"]
114
-
115
- # Encode the downloaded HTML page to base64
116
- base64_html, encode_error = get_cached_base64_html(username)
117
- if base64_html:
118
- # Provide a download link for the base64-encoded HTML
119
- b64_filename = f"{username}_base64.txt"
120
- st.download_button(
121
- label=f"📥 Download {username}'s Base64 Encoded HTML",
122
- data=base64_html,
123
- file_name=b64_filename,
124
- mime="text/plain"
125
- )
126
  else:
127
- st.error(f"Failed to encode HTML for {username}: {encode_error}")
128
-
129
  # Add to statistics
130
  stats["username"].append(username)
131
  stats["models_count"].append(len(models))
132
  stats["datasets_count"].append(len(datasets))
133
-
134
  # Display models
135
  with st.expander(f"🧠 Models ({len(models)})", expanded=False):
136
  if models:
@@ -139,7 +155,7 @@ if st.button("Show User Content"):
139
  st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
140
  else:
141
  st.markdown("No models found. 🤷‍♂️")
142
-
143
  # Display datasets
144
  with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
145
  if datasets:
@@ -148,13 +164,24 @@ if st.button("Show User Content"):
148
  st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
149
  else:
150
  st.markdown("No datasets found. 🤷‍♀️")
151
-
152
  st.markdown("---")
153
-
 
 
 
 
 
 
 
 
 
 
 
154
  # Plotly graphs to visualize the number of models and datasets each user has
155
  if stats["username"]:
156
  st.markdown("### User Content Statistics")
157
-
158
  # Number of models per user
159
  fig_models = px.bar(
160
  x=stats["username"],
@@ -163,7 +190,7 @@ if st.button("Show User Content"):
163
  title="Number of Models per User"
164
  )
165
  st.plotly_chart(fig_models)
166
-
167
  # Number of datasets per user
168
  fig_datasets = px.bar(
169
  x=stats["username"],
@@ -172,7 +199,7 @@ if st.button("Show User Content"):
172
  title="Number of Datasets per User"
173
  )
174
  st.plotly_chart(fig_datasets)
175
-
176
  else:
177
  st.warning("Please enter at least one username. Don't be shy! 😅")
178
 
@@ -182,6 +209,6 @@ st.sidebar.markdown("""
182
  1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
183
  2. Click **'Show User Content'**.
184
  3. View each user's models and datasets along with a link to their Hugging Face profile.
185
- 4. **Download a base64-encoded HTML page** for each user by clicking the download button.
186
  5. Check out the statistics visualizations below!
187
  """)
 
5
  import asyncio
6
  from huggingface_hub import HfApi
7
  import plotly.express as px
8
+ import zipfile # Importing zipfile to handle ZIP operations
9
 
10
  # Initialize the Hugging Face API
11
  api = HfApi()
 
15
  if not os.path.exists(HTML_DIR):
16
  os.makedirs(HTML_DIR)
17
 
18
+ # Directory to save the ZIP files
19
+ ZIP_DIR = "generated_zips"
20
+ if not os.path.exists(ZIP_DIR):
21
+ os.makedirs(ZIP_DIR)
22
+
23
  # Default list of Hugging Face usernames
24
  default_users = {
25
  "users": [
 
46
  except Exception as e:
47
  return {"username": username, "error": str(e)}
48
 
 
 
 
 
 
49
  # Function to download the user page using requests
50
  def download_user_page(username):
51
  url = f"https://huggingface.co/{username}"
 
60
  except Exception as e:
61
  return None, str(e)
62
 
63
+ # Function to create a ZIP archive of the HTML files
64
+ @st.cache_resource
65
+ def create_zip_of_files(files):
66
+ zip_name = "HuggingFace_User_Pages.zip" # Renamed for clarity
67
+ zip_file_path = os.path.join(ZIP_DIR, zip_name)
68
+ with zipfile.ZipFile(zip_file_path, 'w') as zipf:
69
+ for file in files:
70
+ # Add each HTML file to the ZIP archive with its basename
71
+ zipf.write(file, arcname=os.path.basename(file))
72
+ return zip_file_path
73
+
74
+ # Function to generate a download link for the ZIP file
75
+ @st.cache_resource
76
+ def get_zip_download_link(zip_file):
77
+ with open(zip_file, 'rb') as f:
78
+ data = f.read()
79
+ b64 = base64.b64encode(data).decode()
80
+ href = f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(zip_file)}">📥 Download All HTML Pages as ZIP</a>'
81
+ return href
82
+
83
+ # Function to fetch all users concurrently
84
+ async def fetch_all_users(usernames):
85
+ tasks = [fetch_user_content(username) for username in usernames]
86
+ return await asyncio.gather(*tasks)
87
 
88
+ # Function to get all HTML files for the selected users
89
+ def get_all_html_files(usernames):
90
+ html_files = []
91
+ errors = {}
92
+ for username in usernames:
93
+ html_file, error = download_user_page(username)
94
+ if html_file:
95
+ html_files.append(html_file)
96
+ else:
97
+ errors[username] = error
98
+ return html_files, errors
99
 
100
  # Streamlit app setup
101
+ st.title("Hugging Face User Page Downloader & Zipper 📄➕📦")
102
 
103
  # Text area with default list of usernames
104
  user_input = st.text_area(
 
112
  if user_input:
113
  username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
114
 
115
+ # Fetch user content asynchronously
116
+ user_data_list = asyncio.run(fetch_all_users(username_list))
117
+
118
  # Collect statistics for Plotly graphs
119
  stats = {"username": [], "models_count": [], "datasets_count": []}
120
+
121
+ # List to store paths of successfully downloaded HTML files
122
+ successful_html_files = []
123
+
124
  st.markdown("### User Content Overview")
125
+ for user_data in user_data_list:
126
+ username = user_data["username"]
127
  with st.container():
128
  # Profile link
129
  st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")
130
+
 
 
131
  if "error" in user_data:
132
  st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
133
  else:
134
  models = user_data["models"]
135
  datasets = user_data["datasets"]
136
+
137
+ # Download the user's HTML page
138
+ html_file_path, download_error = download_user_page(username)
139
+ if html_file_path:
140
+ successful_html_files.append(html_file_path)
141
+ st.success(f"✅ Successfully downloaded {username}'s page.")
 
 
 
 
 
 
142
  else:
143
+ st.error(f"Failed to download {username}'s page: {download_error}")
144
+
145
  # Add to statistics
146
  stats["username"].append(username)
147
  stats["models_count"].append(len(models))
148
  stats["datasets_count"].append(len(datasets))
149
+
150
  # Display models
151
  with st.expander(f"🧠 Models ({len(models)})", expanded=False):
152
  if models:
 
155
  st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
156
  else:
157
  st.markdown("No models found. 🤷‍♂️")
158
+
159
  # Display datasets
160
  with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
161
  if datasets:
 
164
  st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
165
  else:
166
  st.markdown("No datasets found. 🤷‍♀️")
167
+
168
  st.markdown("---")
169
+
170
+ # Check if there are any successfully downloaded HTML files to zip
171
+ if successful_html_files:
172
+ # Create a ZIP archive of the HTML files
173
+ zip_file_path = create_zip_of_files(successful_html_files)
174
+
175
+ # Generate a download link for the ZIP file
176
+ zip_download_link = get_zip_download_link(zip_file_path)
177
+ st.markdown(zip_download_link, unsafe_allow_html=True)
178
+ else:
179
+ st.warning("No HTML files were successfully downloaded to create a ZIP archive.")
180
+
181
  # Plotly graphs to visualize the number of models and datasets each user has
182
  if stats["username"]:
183
  st.markdown("### User Content Statistics")
184
+
185
  # Number of models per user
186
  fig_models = px.bar(
187
  x=stats["username"],
 
190
  title="Number of Models per User"
191
  )
192
  st.plotly_chart(fig_models)
193
+
194
  # Number of datasets per user
195
  fig_datasets = px.bar(
196
  x=stats["username"],
 
199
  title="Number of Datasets per User"
200
  )
201
  st.plotly_chart(fig_datasets)
202
+
203
  else:
204
  st.warning("Please enter at least one username. Don't be shy! 😅")
205
 
 
209
  1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
210
  2. Click **'Show User Content'**.
211
  3. View each user's models and datasets along with a link to their Hugging Face profile.
212
+ 4. **Download a ZIP archive** containing all the HTML pages by clicking the download link.
213
  5. Check out the statistics visualizations below!
214
  """)