Spaces:

Gyaneshere
/

MyActivity

Sleeping

App Files Files Community

Gyaneshere commited on Mar 30

Commit

c5420f7

verified ·

1 Parent(s): bfe26bd

Create app.py

Browse files

Files changed (1) hide show

app.py +273 -0

app.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import streamlit as st
+from huggingface_hub import HfApi
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from datetime import datetime
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from functools import lru_cache
+import time
+st.set_page_config(page_title="HF Contributions", layout="wide")
+api = HfApi()
+# Cache for API responses
+@lru_cache(maxsize=1000)
+def cached_repo_info(repo_id, repo_type):
+    return api.repo_info(repo_id=repo_id, repo_type=repo_type)
+@lru_cache(maxsize=1000)
+def cached_list_commits(repo_id, repo_type):
+    return list(api.list_repo_commits(repo_id=repo_id, repo_type=repo_type))
+@lru_cache(maxsize=100)
+def cached_list_items(username, kind):
+    if kind == "model":
+        return list(api.list_models(author=username))
+    elif kind == "dataset":
+        return list(api.list_datasets(author=username))
+    elif kind == "space":
+        return list(api.list_spaces(author=username))
+    return []
+# Rate limiting
+class RateLimiter:
+    def __init__(self, calls_per_second=10):
+        self.calls_per_second = calls_per_second
+        self.last_call = 0
+    def wait(self):
+        current_time = time.time()
+        time_since_last_call = current_time - self.last_call
+        if time_since_last_call < (1.0 / self.calls_per_second):
+            time.sleep((1.0 / self.calls_per_second) - time_since_last_call)
+        self.last_call = time.time()
+rate_limiter = RateLimiter()
+# Function to fetch commits for a repository (optimized)
+def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
+    try:
+        rate_limiter.wait()
+        # Skip private/gated repos upfront
+        repo_info = cached_repo_info(repo_id, repo_type)
+        if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated):
+            return [], []
+        # Get initial commit date
+        initial_commit_date = pd.to_datetime(repo_info.created_at).tz_localize(None).date()
+        commit_dates = []
+        commit_count = 0
+        # Add initial commit if it's from the selected year
+        if initial_commit_date.year == selected_year:
+            commit_dates.append(initial_commit_date)
+            commit_count += 1
+        # Get all commits
+        commits = cached_list_commits(repo_id, repo_type)
+        for commit in commits:
+            commit_date = pd.to_datetime(commit.created_at).tz_localize(None).date()
+            if commit_date.year == selected_year:
+                commit_dates.append(commit_date)
+                commit_count += 1
+        return commit_dates, commit_count
+    except Exception:
+        return [], 0
+# Function to get commit events for a user (optimized)
+def get_commit_events(username, kind=None, selected_year=None):
+    commit_dates = []
+    items_with_type = []
+    kinds = [kind] if kind else ["model", "dataset", "space"]
+    for k in kinds:
+        try:
+            items = cached_list_items(username, k)
+            items_with_type.extend((item, k) for item in items)
+            repo_ids = [item.id for item in items]
+            # Optimized parallel fetch with chunking
+            chunk_size = 5  # Process 5 repos at a time
+            for i in range(0, len(repo_ids), chunk_size):
+                chunk = repo_ids[i:i + chunk_size]
+                with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
+                    future_to_repo = {
+                        executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id
+                        for repo_id in chunk
+                    }
+                    for future in as_completed(future_to_repo):
+                        repo_commits, repo_count = future.result()
+                        if repo_commits:  # Only extend if we got commits
+                            commit_dates.extend(repo_commits)
+        except Exception as e:
+            st.warning(f"Error fetching {k}s for {username}: {str(e)}")
+    # Create DataFrame with all commits
+    df = pd.DataFrame(commit_dates, columns=["date"])
+    if not df.empty:
+        df = df.drop_duplicates()  # Remove any duplicate dates
+    return df, items_with_type
+# Calendar heatmap function (optimized)
+def make_calendar_heatmap(df, title, year):
+    if df.empty:
+        st.info(f"No {title.lower()} found for {year}.")
+        return
+    # Optimize DataFrame operations
+    df["count"] = 1
+    df = df.groupby("date", as_index=False).sum()
+    df["date"] = pd.to_datetime(df["date"])
+    # Create date range more efficiently
+    start = pd.Timestamp(f"{year}-01-01")
+    end = pd.Timestamp(f"{year}-12-31")
+    all_days = pd.date_range(start=start, end=end)
+    # Optimize DataFrame creation and merging
+    heatmap_data = pd.DataFrame({"date": all_days, "count": 0})
+    heatmap_data = heatmap_data.merge(df, on="date", how="left", suffixes=("", "_y"))
+    heatmap_data["count"] = heatmap_data["count_y"].fillna(0)
+    heatmap_data = heatmap_data.drop("count_y", axis=1)
+    # Calculate week and day of week more efficiently
+    heatmap_data["dow"] = heatmap_data["date"].dt.dayofweek
+    heatmap_data["week"] = (heatmap_data["date"] - start).dt.days // 7
+    # Create pivot table more efficiently
+    pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0)
+    # Optimize month labels calculation
+    month_labels = pd.date_range(start, end, freq="MS").strftime("%b")
+    month_positions = pd.date_range(start, end, freq="MS").map(lambda x: (x - start).days // 7)
+    # Create custom colormap with specific boundaries
+    from matplotlib.colors import ListedColormap, BoundaryNorm
+    colors = ['#ebedf0', '#9be9a8', '#40c463', '#30a14e', '#216e39']  # GitHub-style green colors
+    bounds = [0, 1, 3, 11, 31, float('inf')]  # Boundaries for color transitions
+    cmap = ListedColormap(colors)
+    norm = BoundaryNorm(bounds, cmap.N)
+    # Create plot more efficiently
+    fig, ax = plt.subplots(figsize=(12, 1.2))
+    # Convert pivot values to integers to ensure proper color mapping
+    pivot_int = pivot.astype(int)
+    # Create heatmap with explicit vmin and vmax
+    sns.heatmap(pivot_int, ax=ax, cmap=cmap, norm=norm, linewidths=0.5, linecolor="white",
+                square=True, cbar=False, yticklabels=["M", "T", "W", "T", "F", "S", "S"])
+    ax.set_title(f"{title}", fontsize=12, pad=10)
+    ax.set_xlabel("")
+    ax.set_ylabel("")
+    ax.set_xticks(month_positions)
+    ax.set_xticklabels(month_labels, fontsize=8)
+    ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8)
+    st.pyplot(fig)
+# Sidebar
+with st.sidebar:
+    st.title("👤 Contributor")
+    username = st.selectbox(
+        "Select or type a username",
+        options=["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"],
+        index=0
+    )
+    st.markdown("<div style='text-align: center; margin: 10px 0;'>OR</div>", unsafe_allow_html=True)
+    custom = st.text_input("", placeholder="Enter custom username/org")
+    if custom.strip():
+        username = custom.strip()
+    year_options = list(range(datetime.now().year, 2017, -1))
+    selected_year = st.selectbox("🗓️ Year", options=year_options)
+# Main Content
+st.title("🤗 Hugging Face Contributions")
+if username:
+    with st.spinner("Fetching commit data..."):
+        # Create a dictionary to store commits by type
+        commits_by_type = {}
+        commit_counts_by_type = {}
+        # Fetch commits for each type separately
+        for kind in ["model", "dataset", "space"]:
+            try:
+                items = cached_list_items(username, kind)
+                repo_ids = [item.id for item in items]
+                # Process repos in chunks
+                chunk_size = 5
+                total_commits = 0
+                all_commit_dates = []
+                for i in range(0, len(repo_ids), chunk_size):
+                    chunk = repo_ids[i:i + chunk_size]
+                    with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
+                        future_to_repo = {
+                            executor.submit(fetch_commits_for_repo, repo_id, kind, username, selected_year): repo_id
+                            for repo_id in chunk
+                        }
+                        for future in as_completed(future_to_repo):
+                            repo_commits, repo_count = future.result()
+                            if repo_commits:
+                                all_commit_dates.extend(repo_commits)
+                                total_commits += repo_count
+                commits_by_type[kind] = all_commit_dates
+                commit_counts_by_type[kind] = total_commits
+            except Exception as e:
+                st.warning(f"Error fetching {kind}s for {username}: {str(e)}")
+                commits_by_type[kind] = []
+                commit_counts_by_type[kind] = 0
+        # Calculate total commits across all types
+        total_commits = sum(commit_counts_by_type.values())
+        st.subheader(f"{username}'s Activity in {selected_year}")
+        st.metric("Total Commits", total_commits)
+        # Create DataFrame for all commits
+        all_commits = []
+        for commits in commits_by_type.values():
+            all_commits.extend(commits)
+        all_df = pd.DataFrame(all_commits, columns=["date"])
+        if not all_df.empty:
+            all_df = all_df.drop_duplicates()  # Remove any duplicate dates
+        make_calendar_heatmap(all_df, "All Commits", selected_year)
+        # Metrics and heatmaps for each type
+        col1, col2, col3 = st.columns(3)
+        for col, kind, emoji, label in [
+            (col1, "model", "🧠", "Models"),
+            (col2, "dataset", "📦", "Datasets"),
+            (col3, "space", "🚀", "Spaces")
+        ]:
+            with col:
+                try:
+                    total = len(cached_list_items(username, kind))
+                    commits = commits_by_type.get(kind, [])
+                    commit_count = commit_counts_by_type.get(kind, 0)
+                    df_kind = pd.DataFrame(commits, columns=["date"])
+                    if not df_kind.empty:
+                        df_kind = df_kind.drop_duplicates()  # Remove any duplicate dates
+                    st.metric(f"{emoji} {label}", total)
+                    st.metric(f"Commits in {selected_year}", commit_count)
+                    make_calendar_heatmap(df_kind, f"{label} Commits", selected_year)
+                except Exception as e:
+                    st.warning(f"Error processing {label}: {str(e)}")
+                    st.metric(f"{emoji} {label}", 0)
+                    st.metric(f"Commits in {selected_year}", 0)
+                    make_calendar_heatmap(pd.DataFrame(), f"{label} Commits", selected_year)