Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import os
|
|
5 |
import asyncio
|
6 |
from huggingface_hub import HfApi
|
7 |
import plotly.express as px
|
|
|
8 |
|
9 |
# Initialize the Hugging Face API
|
10 |
api = HfApi()
|
@@ -14,6 +15,11 @@ HTML_DIR = "generated_html_pages"
|
|
14 |
if not os.path.exists(HTML_DIR):
|
15 |
os.makedirs(HTML_DIR)
|
16 |
|
|
|
|
|
|
|
|
|
|
|
17 |
# Default list of Hugging Face usernames
|
18 |
default_users = {
|
19 |
"users": [
|
@@ -40,11 +46,6 @@ async def fetch_user_content(username):
|
|
40 |
except Exception as e:
|
41 |
return {"username": username, "error": str(e)}
|
42 |
|
43 |
-
# Fetch all users concurrently
|
44 |
-
async def fetch_all_users(usernames):
|
45 |
-
tasks = [fetch_user_content(username) for username in usernames]
|
46 |
-
return await asyncio.gather(*tasks)
|
47 |
-
|
48 |
# Function to download the user page using requests
|
49 |
def download_user_page(username):
|
50 |
url = f"https://huggingface.co/{username}"
|
@@ -59,29 +60,45 @@ def download_user_page(username):
|
|
59 |
except Exception as e:
|
60 |
return None, str(e)
|
61 |
|
62 |
-
# Function to
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
-
#
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
82 |
|
83 |
# Streamlit app setup
|
84 |
-
st.title("Hugging Face User Page Downloader
|
85 |
|
86 |
# Text area with default list of usernames
|
87 |
user_input = st.text_area(
|
@@ -95,42 +112,41 @@ if st.button("Show User Content"):
|
|
95 |
if user_input:
|
96 |
username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
|
97 |
|
|
|
|
|
|
|
98 |
# Collect statistics for Plotly graphs
|
99 |
stats = {"username": [], "models_count": [], "datasets_count": []}
|
100 |
-
|
|
|
|
|
|
|
101 |
st.markdown("### User Content Overview")
|
102 |
-
for
|
|
|
103 |
with st.container():
|
104 |
# Profile link
|
105 |
st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")
|
106 |
-
|
107 |
-
# Fetch models and datasets
|
108 |
-
user_data = asyncio.run(fetch_user_content(username))
|
109 |
if "error" in user_data:
|
110 |
st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
|
111 |
else:
|
112 |
models = user_data["models"]
|
113 |
datasets = user_data["datasets"]
|
114 |
-
|
115 |
-
#
|
116 |
-
|
117 |
-
if
|
118 |
-
|
119 |
-
|
120 |
-
st.download_button(
|
121 |
-
label=f"📥 Download {username}'s Base64 Encoded HTML",
|
122 |
-
data=base64_html,
|
123 |
-
file_name=b64_filename,
|
124 |
-
mime="text/plain"
|
125 |
-
)
|
126 |
else:
|
127 |
-
st.error(f"Failed to
|
128 |
-
|
129 |
# Add to statistics
|
130 |
stats["username"].append(username)
|
131 |
stats["models_count"].append(len(models))
|
132 |
stats["datasets_count"].append(len(datasets))
|
133 |
-
|
134 |
# Display models
|
135 |
with st.expander(f"🧠 Models ({len(models)})", expanded=False):
|
136 |
if models:
|
@@ -139,7 +155,7 @@ if st.button("Show User Content"):
|
|
139 |
st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
|
140 |
else:
|
141 |
st.markdown("No models found. 🤷♂️")
|
142 |
-
|
143 |
# Display datasets
|
144 |
with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
|
145 |
if datasets:
|
@@ -148,13 +164,24 @@ if st.button("Show User Content"):
|
|
148 |
st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
|
149 |
else:
|
150 |
st.markdown("No datasets found. 🤷♀️")
|
151 |
-
|
152 |
st.markdown("---")
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
# Plotly graphs to visualize the number of models and datasets each user has
|
155 |
if stats["username"]:
|
156 |
st.markdown("### User Content Statistics")
|
157 |
-
|
158 |
# Number of models per user
|
159 |
fig_models = px.bar(
|
160 |
x=stats["username"],
|
@@ -163,7 +190,7 @@ if st.button("Show User Content"):
|
|
163 |
title="Number of Models per User"
|
164 |
)
|
165 |
st.plotly_chart(fig_models)
|
166 |
-
|
167 |
# Number of datasets per user
|
168 |
fig_datasets = px.bar(
|
169 |
x=stats["username"],
|
@@ -172,7 +199,7 @@ if st.button("Show User Content"):
|
|
172 |
title="Number of Datasets per User"
|
173 |
)
|
174 |
st.plotly_chart(fig_datasets)
|
175 |
-
|
176 |
else:
|
177 |
st.warning("Please enter at least one username. Don't be shy! 😅")
|
178 |
|
@@ -182,6 +209,6 @@ st.sidebar.markdown("""
|
|
182 |
1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
|
183 |
2. Click **'Show User Content'**.
|
184 |
3. View each user's models and datasets along with a link to their Hugging Face profile.
|
185 |
-
4. **Download a
|
186 |
5. Check out the statistics visualizations below!
|
187 |
""")
|
|
|
5 |
import asyncio
|
6 |
from huggingface_hub import HfApi
|
7 |
import plotly.express as px
|
8 |
+
import zipfile # Importing zipfile to handle ZIP operations
|
9 |
|
10 |
# Initialize the Hugging Face API
|
11 |
api = HfApi()
|
|
|
15 |
if not os.path.exists(HTML_DIR):
|
16 |
os.makedirs(HTML_DIR)
|
17 |
|
18 |
+
# Directory to save the ZIP files
|
19 |
+
ZIP_DIR = "generated_zips"
|
20 |
+
if not os.path.exists(ZIP_DIR):
|
21 |
+
os.makedirs(ZIP_DIR)
|
22 |
+
|
23 |
# Default list of Hugging Face usernames
|
24 |
default_users = {
|
25 |
"users": [
|
|
|
46 |
except Exception as e:
|
47 |
return {"username": username, "error": str(e)}
|
48 |
|
|
|
|
|
|
|
|
|
|
|
49 |
# Function to download the user page using requests
|
50 |
def download_user_page(username):
|
51 |
url = f"https://huggingface.co/{username}"
|
|
|
60 |
except Exception as e:
|
61 |
return None, str(e)
|
62 |
|
63 |
+
# Function to create a ZIP archive of the HTML files
|
64 |
+
@st.cache_resource
|
65 |
+
def create_zip_of_files(files):
|
66 |
+
zip_name = "HuggingFace_User_Pages.zip" # Renamed for clarity
|
67 |
+
zip_file_path = os.path.join(ZIP_DIR, zip_name)
|
68 |
+
with zipfile.ZipFile(zip_file_path, 'w') as zipf:
|
69 |
+
for file in files:
|
70 |
+
# Add each HTML file to the ZIP archive with its basename
|
71 |
+
zipf.write(file, arcname=os.path.basename(file))
|
72 |
+
return zip_file_path
|
73 |
+
|
74 |
+
# Function to generate a download link for the ZIP file
|
75 |
+
@st.cache_resource
|
76 |
+
def get_zip_download_link(zip_file):
|
77 |
+
with open(zip_file, 'rb') as f:
|
78 |
+
data = f.read()
|
79 |
+
b64 = base64.b64encode(data).decode()
|
80 |
+
href = f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(zip_file)}">📥 Download All HTML Pages as ZIP</a>'
|
81 |
+
return href
|
82 |
+
|
83 |
+
# Function to fetch all users concurrently
|
84 |
+
async def fetch_all_users(usernames):
|
85 |
+
tasks = [fetch_user_content(username) for username in usernames]
|
86 |
+
return await asyncio.gather(*tasks)
|
87 |
|
88 |
+
# Function to get all HTML files for the selected users
|
89 |
+
def get_all_html_files(usernames):
|
90 |
+
html_files = []
|
91 |
+
errors = {}
|
92 |
+
for username in usernames:
|
93 |
+
html_file, error = download_user_page(username)
|
94 |
+
if html_file:
|
95 |
+
html_files.append(html_file)
|
96 |
+
else:
|
97 |
+
errors[username] = error
|
98 |
+
return html_files, errors
|
99 |
|
100 |
# Streamlit app setup
|
101 |
+
st.title("Hugging Face User Page Downloader & Zipper 📄➕📦")
|
102 |
|
103 |
# Text area with default list of usernames
|
104 |
user_input = st.text_area(
|
|
|
112 |
if user_input:
|
113 |
username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
|
114 |
|
115 |
+
# Fetch user content asynchronously
|
116 |
+
user_data_list = asyncio.run(fetch_all_users(username_list))
|
117 |
+
|
118 |
# Collect statistics for Plotly graphs
|
119 |
stats = {"username": [], "models_count": [], "datasets_count": []}
|
120 |
+
|
121 |
+
# List to store paths of successfully downloaded HTML files
|
122 |
+
successful_html_files = []
|
123 |
+
|
124 |
st.markdown("### User Content Overview")
|
125 |
+
for user_data in user_data_list:
|
126 |
+
username = user_data["username"]
|
127 |
with st.container():
|
128 |
# Profile link
|
129 |
st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")
|
130 |
+
|
|
|
|
|
131 |
if "error" in user_data:
|
132 |
st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
|
133 |
else:
|
134 |
models = user_data["models"]
|
135 |
datasets = user_data["datasets"]
|
136 |
+
|
137 |
+
# Download the user's HTML page
|
138 |
+
html_file_path, download_error = download_user_page(username)
|
139 |
+
if html_file_path:
|
140 |
+
successful_html_files.append(html_file_path)
|
141 |
+
st.success(f"✅ Successfully downloaded {username}'s page.")
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
else:
|
143 |
+
st.error(f"❌ Failed to download {username}'s page: {download_error}")
|
144 |
+
|
145 |
# Add to statistics
|
146 |
stats["username"].append(username)
|
147 |
stats["models_count"].append(len(models))
|
148 |
stats["datasets_count"].append(len(datasets))
|
149 |
+
|
150 |
# Display models
|
151 |
with st.expander(f"🧠 Models ({len(models)})", expanded=False):
|
152 |
if models:
|
|
|
155 |
st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
|
156 |
else:
|
157 |
st.markdown("No models found. 🤷♂️")
|
158 |
+
|
159 |
# Display datasets
|
160 |
with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
|
161 |
if datasets:
|
|
|
164 |
st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
|
165 |
else:
|
166 |
st.markdown("No datasets found. 🤷♀️")
|
167 |
+
|
168 |
st.markdown("---")
|
169 |
+
|
170 |
+
# Check if there are any successfully downloaded HTML files to zip
|
171 |
+
if successful_html_files:
|
172 |
+
# Create a ZIP archive of the HTML files
|
173 |
+
zip_file_path = create_zip_of_files(successful_html_files)
|
174 |
+
|
175 |
+
# Generate a download link for the ZIP file
|
176 |
+
zip_download_link = get_zip_download_link(zip_file_path)
|
177 |
+
st.markdown(zip_download_link, unsafe_allow_html=True)
|
178 |
+
else:
|
179 |
+
st.warning("No HTML files were successfully downloaded to create a ZIP archive.")
|
180 |
+
|
181 |
# Plotly graphs to visualize the number of models and datasets each user has
|
182 |
if stats["username"]:
|
183 |
st.markdown("### User Content Statistics")
|
184 |
+
|
185 |
# Number of models per user
|
186 |
fig_models = px.bar(
|
187 |
x=stats["username"],
|
|
|
190 |
title="Number of Models per User"
|
191 |
)
|
192 |
st.plotly_chart(fig_models)
|
193 |
+
|
194 |
# Number of datasets per user
|
195 |
fig_datasets = px.bar(
|
196 |
x=stats["username"],
|
|
|
199 |
title="Number of Datasets per User"
|
200 |
)
|
201 |
st.plotly_chart(fig_datasets)
|
202 |
+
|
203 |
else:
|
204 |
st.warning("Please enter at least one username. Don't be shy! 😅")
|
205 |
|
|
|
209 |
1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
|
210 |
2. Click **'Show User Content'**.
|
211 |
3. View each user's models and datasets along with a link to their Hugging Face profile.
|
212 |
+
4. **Download a ZIP archive** containing all the HTML pages by clicking the download link.
|
213 |
5. Check out the statistics visualizations below!
|
214 |
""")
|