Spaces:

AzureModels4AI
/

PeopleModelsDatasets2X

Sleeping

App Files Files Community

awacke1 commited on Sep 17, 2024

Commit

d34785a

verified ·

1 Parent(s): 94380fb

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -52

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import os
 import asyncio
 from huggingface_hub import HfApi
 import plotly.express as px
 # Initialize the Hugging Face API
 api = HfApi()
@@ -14,6 +15,11 @@ HTML_DIR = "generated_html_pages"
 if not os.path.exists(HTML_DIR):
     os.makedirs(HTML_DIR)
 # Default list of Hugging Face usernames
 default_users = {
     "users": [
@@ -40,11 +46,6 @@ async def fetch_user_content(username):
     except Exception as e:
         return {"username": username, "error": str(e)}
-# Fetch all users concurrently
-async def fetch_all_users(usernames):
-    tasks = [fetch_user_content(username) for username in usernames]
-    return await asyncio.gather(*tasks)
 # Function to download the user page using requests
 def download_user_page(username):
     url = f"https://huggingface.co/{username}"
@@ -59,29 +60,45 @@ def download_user_page(username):
     except Exception as e:
         return None, str(e)
-# Function to base64 encode the HTML file
-def encode_html_to_base64(html_file_path):
-    try:
-        with open(html_file_path, "rb") as file:
-            encoded_bytes = base64.b64encode(file.read())
-            encoded_str = encoded_bytes.decode('utf-8')
-        return encoded_str, None
-    except Exception as e:
-        return None, str(e)
-# Cache the downloaded and encoded content to avoid redundant operations
-@st.cache_data(show_spinner=False, ttl=3600)
-def get_cached_base64_html(username):
-    html_file_path, error = download_user_page(username)
-    if error:
-        return None, error
-    encoded_str, encode_error = encode_html_to_base64(html_file_path)
-    if encode_error:
-        return None, encode_error
-    return encoded_str, None
 # Streamlit app setup
-st.title("Hugging Face User Page Downloader 📄✨")
 # Text area with default list of usernames
 user_input = st.text_area(
@@ -95,42 +112,41 @@ if st.button("Show User Content"):
     if user_input:
         username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
         # Collect statistics for Plotly graphs
         stats = {"username": [], "models_count": [], "datasets_count": []}
         st.markdown("### User Content Overview")
-        for username in username_list:
             with st.container():
                 # Profile link
                 st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")
-                # Fetch models and datasets
-                user_data = asyncio.run(fetch_user_content(username))
                 if "error" in user_data:
                     st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
                 else:
                     models = user_data["models"]
                     datasets = user_data["datasets"]
-                    # Encode the downloaded HTML page to base64
-                    base64_html, encode_error = get_cached_base64_html(username)
-                    if base64_html:
-                        # Provide a download link for the base64-encoded HTML
-                        b64_filename = f"{username}_base64.txt"
-                        st.download_button(
-                            label=f"📥 Download {username}'s Base64 Encoded HTML",
-                            data=base64_html,
-                            file_name=b64_filename,
-                            mime="text/plain"
-                        )
                     else:
-                        st.error(f"Failed to encode HTML for {username}: {encode_error}")
                     # Add to statistics
                     stats["username"].append(username)
                     stats["models_count"].append(len(models))
                     stats["datasets_count"].append(len(datasets))
                     # Display models
                     with st.expander(f"🧠 Models ({len(models)})", expanded=False):
                         if models:
@@ -139,7 +155,7 @@ if st.button("Show User Content"):
                                 st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
                         else:
                             st.markdown("No models found. 🤷‍♂️")
                     # Display datasets
                     with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
                         if datasets:
@@ -148,13 +164,24 @@ if st.button("Show User Content"):
                                 st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
                         else:
                             st.markdown("No datasets found. 🤷‍♀️")
                 st.markdown("---")
         # Plotly graphs to visualize the number of models and datasets each user has
         if stats["username"]:
             st.markdown("### User Content Statistics")
             # Number of models per user
             fig_models = px.bar(
                 x=stats["username"],
@@ -163,7 +190,7 @@ if st.button("Show User Content"):
                 title="Number of Models per User"
             )
             st.plotly_chart(fig_models)
             # Number of datasets per user
             fig_datasets = px.bar(
                 x=stats["username"],
@@ -172,7 +199,7 @@ if st.button("Show User Content"):
                 title="Number of Datasets per User"
             )
             st.plotly_chart(fig_datasets)
     else:
         st.warning("Please enter at least one username. Don't be shy! 😅")
@@ -182,6 +209,6 @@ st.sidebar.markdown("""
 1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
 2. Click **'Show User Content'**.
 3. View each user's models and datasets along with a link to their Hugging Face profile.
-4. **Download a base64-encoded HTML page** for each user by clicking the download button.
 5. Check out the statistics visualizations below!
 """)

 import asyncio
 from huggingface_hub import HfApi
 import plotly.express as px
+import zipfile  # Importing zipfile to handle ZIP operations
 # Initialize the Hugging Face API
 api = HfApi()
 if not os.path.exists(HTML_DIR):
     os.makedirs(HTML_DIR)
+# Directory to save the ZIP files
+ZIP_DIR = "generated_zips"
+if not os.path.exists(ZIP_DIR):
+    os.makedirs(ZIP_DIR)
 # Default list of Hugging Face usernames
 default_users = {
     "users": [
     except Exception as e:
         return {"username": username, "error": str(e)}
 # Function to download the user page using requests
 def download_user_page(username):
     url = f"https://huggingface.co/{username}"
     except Exception as e:
         return None, str(e)
+# Function to create a ZIP archive of the HTML files
+@st.cache_resource
+def create_zip_of_files(files):
+    zip_name = "HuggingFace_User_Pages.zip"  # Renamed for clarity
+    zip_file_path = os.path.join(ZIP_DIR, zip_name)
+    with zipfile.ZipFile(zip_file_path, 'w') as zipf:
+        for file in files:
+            # Add each HTML file to the ZIP archive with its basename
+            zipf.write(file, arcname=os.path.basename(file))
+    return zip_file_path
+# Function to generate a download link for the ZIP file
+@st.cache_resource
+def get_zip_download_link(zip_file):
+    with open(zip_file, 'rb') as f:
+        data = f.read()
+    b64 = base64.b64encode(data).decode()
+    href = f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(zip_file)}">📥 Download All HTML Pages as ZIP</a>'
+    return href
+# Function to fetch all users concurrently
+async def fetch_all_users(usernames):
+    tasks = [fetch_user_content(username) for username in usernames]
+    return await asyncio.gather(*tasks)
+# Function to get all HTML files for the selected users
+def get_all_html_files(usernames):
+    html_files = []
+    errors = {}
+    for username in usernames:
+        html_file, error = download_user_page(username)
+        if html_file:
+            html_files.append(html_file)
+        else:
+            errors[username] = error
+    return html_files, errors
 # Streamlit app setup
+st.title("Hugging Face User Page Downloader & Zipper 📄➕📦")
 # Text area with default list of usernames
 user_input = st.text_area(
     if user_input:
         username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
+        # Fetch user content asynchronously
+        user_data_list = asyncio.run(fetch_all_users(username_list))
         # Collect statistics for Plotly graphs
         stats = {"username": [], "models_count": [], "datasets_count": []}
+        # List to store paths of successfully downloaded HTML files
+        successful_html_files = []
         st.markdown("### User Content Overview")
+        for user_data in user_data_list:
+            username = user_data["username"]
             with st.container():
                 # Profile link
                 st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")
                 if "error" in user_data:
                     st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
                 else:
                     models = user_data["models"]
                     datasets = user_data["datasets"]
+                    # Download the user's HTML page
+                    html_file_path, download_error = download_user_page(username)
+                    if html_file_path:
+                        successful_html_files.append(html_file_path)
+                        st.success(f"✅ Successfully downloaded {username}'s page.")
                     else:
+                        st.error(f"❌ Failed to download {username}'s page: {download_error}")
                     # Add to statistics
                     stats["username"].append(username)
                     stats["models_count"].append(len(models))
                     stats["datasets_count"].append(len(datasets))
                     # Display models
                     with st.expander(f"🧠 Models ({len(models)})", expanded=False):
                         if models:
                                 st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
                         else:
                             st.markdown("No models found. 🤷‍♂️")
                     # Display datasets
                     with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
                         if datasets:
                                 st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
                         else:
                             st.markdown("No datasets found. 🤷‍♀️")
                 st.markdown("---")
+        # Check if there are any successfully downloaded HTML files to zip
+        if successful_html_files:
+            # Create a ZIP archive of the HTML files
+            zip_file_path = create_zip_of_files(successful_html_files)
+            # Generate a download link for the ZIP file
+            zip_download_link = get_zip_download_link(zip_file_path)
+            st.markdown(zip_download_link, unsafe_allow_html=True)
+        else:
+            st.warning("No HTML files were successfully downloaded to create a ZIP archive.")
         # Plotly graphs to visualize the number of models and datasets each user has
         if stats["username"]:
             st.markdown("### User Content Statistics")
             # Number of models per user
             fig_models = px.bar(
                 x=stats["username"],
                 title="Number of Models per User"
             )
             st.plotly_chart(fig_models)
             # Number of datasets per user
             fig_datasets = px.bar(
                 x=stats["username"],
                 title="Number of Datasets per User"
             )
             st.plotly_chart(fig_datasets)
     else:
         st.warning("Please enter at least one username. Don't be shy! 😅")
 1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
 2. Click **'Show User Content'**.
 3. View each user's models and datasets along with a link to their Hugging Face profile.
+4. **Download a ZIP archive** containing all the HTML pages by clicking the download link.
 5. Check out the statistics visualizations below!
 """)