Spaces:

AzureModels4AI
/

PeopleModelsDatasets2X

Sleeping

File size: 7,426 Bytes

94380fb

import streamlit as st
import requests
import base64
import os
import asyncio
from huggingface_hub import HfApi
import plotly.express as px

# Initialize the Hugging Face API
api = HfApi()

# Directory to save the downloaded and generated files
HTML_DIR = "generated_html_pages"
if not os.path.exists(HTML_DIR):
    os.makedirs(HTML_DIR)

# Default list of Hugging Face usernames
default_users = {
    "users": [
        "awacke1", "rogerxavier", "jonatasgrosman", "kenshinn", "Csplk", "DavidVivancos",
        "cdminix", "Jaward", "TuringsSolutions", "Severian", "Wauplin",
        "phosseini", "Malikeh1375", "gokaygokay", "MoritzLaurer", "mrm8488",
        "TheBloke", "lhoestq", "xw-eric", "Paul", "Muennighoff",
        "ccdv", "haonan-li", "chansung", "lukaemon", "hails",
        "pharmapsychotic", "KingNish", "merve", "ameerazam08", "ashleykleynhans"
    ]
}

# Asynchronous function to fetch user content using Hugging Face API
async def fetch_user_content(username):
    try:
        # Fetch models and datasets
        models = list(await asyncio.to_thread(api.list_models, author=username))
        datasets = list(await asyncio.to_thread(api.list_datasets, author=username))
        return {
            "username": username,
            "models": models,
            "datasets": datasets
        }
    except Exception as e:
        return {"username": username, "error": str(e)}

# Fetch all users concurrently
async def fetch_all_users(usernames):
    tasks = [fetch_user_content(username) for username in usernames]
    return await asyncio.gather(*tasks)

# Function to download the user page using requests
def download_user_page(username):
    url = f"https://huggingface.co/{username}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        html_content = response.text
        html_file_path = os.path.join(HTML_DIR, f"{username}.html")
        with open(html_file_path, "w", encoding='utf-8') as html_file:
            html_file.write(html_content)
        return html_file_path, None
    except Exception as e:
        return None, str(e)

# Function to base64 encode the HTML file
def encode_html_to_base64(html_file_path):
    try:
        with open(html_file_path, "rb") as file:
            encoded_bytes = base64.b64encode(file.read())
            encoded_str = encoded_bytes.decode('utf-8')
        return encoded_str, None
    except Exception as e:
        return None, str(e)

# Cache the downloaded and encoded content to avoid redundant operations
@st.cache_data(show_spinner=False, ttl=3600)
def get_cached_base64_html(username):
    html_file_path, error = download_user_page(username)
    if error:
        return None, error
    encoded_str, encode_error = encode_html_to_base64(html_file_path)
    if encode_error:
        return None, encode_error
    return encoded_str, None

# Streamlit app setup
st.title("Hugging Face User Page Downloader 📄✨")

# Text area with default list of usernames
user_input = st.text_area(
    "Enter Hugging Face usernames (one per line):",
    value="\n".join(default_users["users"]),
    height=300
)

# Show User Content button
if st.button("Show User Content"):
    if user_input:
        username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
        
        # Collect statistics for Plotly graphs
        stats = {"username": [], "models_count": [], "datasets_count": []}

        st.markdown("### User Content Overview")
        for username in username_list:
            with st.container():
                # Profile link
                st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")

                # Fetch models and datasets
                user_data = asyncio.run(fetch_user_content(username))
                if "error" in user_data:
                    st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
                else:
                    models = user_data["models"]
                    datasets = user_data["datasets"]

                    # Encode the downloaded HTML page to base64
                    base64_html, encode_error = get_cached_base64_html(username)
                    if base64_html:
                        # Provide a download link for the base64-encoded HTML
                        b64_filename = f"{username}_base64.txt"
                        st.download_button(
                            label=f"📥 Download {username}'s Base64 Encoded HTML",
                            data=base64_html,
                            file_name=b64_filename,
                            mime="text/plain"
                        )
                    else:
                        st.error(f"Failed to encode HTML for {username}: {encode_error}")

                    # Add to statistics
                    stats["username"].append(username)
                    stats["models_count"].append(len(models))
                    stats["datasets_count"].append(len(datasets))

                    # Display models
                    with st.expander(f"🧠 Models ({len(models)})", expanded=False):
                        if models:
                            for model in models:
                                model_name = model.modelId.split("/")[-1]
                                st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
                        else:
                            st.markdown("No models found. 🤷‍♂️")

                    # Display datasets
                    with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
                        if datasets:
                            for dataset in datasets:
                                dataset_name = dataset.id.split("/")[-1]
                                st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
                        else:
                            st.markdown("No datasets found. 🤷‍♀️")

                st.markdown("---")

        # Plotly graphs to visualize the number of models and datasets each user has
        if stats["username"]:
            st.markdown("### User Content Statistics")

            # Number of models per user
            fig_models = px.bar(
                x=stats["username"],
                y=stats["models_count"],
                labels={'x': 'Username', 'y': 'Number of Models'},
                title="Number of Models per User"
            )
            st.plotly_chart(fig_models)

            # Number of datasets per user
            fig_datasets = px.bar(
                x=stats["username"],
                y=stats["datasets_count"],
                labels={'x': 'Username', 'y': 'Number of Datasets'},
                title="Number of Datasets per User"
            )
            st.plotly_chart(fig_datasets)

    else:
        st.warning("Please enter at least one username. Don't be shy! 😅")

# Sidebar instructions
st.sidebar.markdown("""
## How to use:
1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
2. Click **'Show User Content'**.
3. View each user's models and datasets along with a link to their Hugging Face profile.
4. **Download a base64-encoded HTML page** for each user by clicking the download button.
5. Check out the statistics visualizations below!
""")