awacke1 commited on
Commit
94380fb
·
verified ·
1 Parent(s): ffa9aed

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -0
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import base64
4
+ import os
5
+ import asyncio
6
+ from huggingface_hub import HfApi
7
+ import plotly.express as px
8
+
9
+ # Initialize the Hugging Face API
10
+ api = HfApi()
11
+
12
+ # Directory to save the downloaded and generated files
13
+ HTML_DIR = "generated_html_pages"
14
+ if not os.path.exists(HTML_DIR):
15
+ os.makedirs(HTML_DIR)
16
+
17
+ # Default list of Hugging Face usernames
18
+ default_users = {
19
+ "users": [
20
+ "awacke1", "rogerxavier", "jonatasgrosman", "kenshinn", "Csplk", "DavidVivancos",
21
+ "cdminix", "Jaward", "TuringsSolutions", "Severian", "Wauplin",
22
+ "phosseini", "Malikeh1375", "gokaygokay", "MoritzLaurer", "mrm8488",
23
+ "TheBloke", "lhoestq", "xw-eric", "Paul", "Muennighoff",
24
+ "ccdv", "haonan-li", "chansung", "lukaemon", "hails",
25
+ "pharmapsychotic", "KingNish", "merve", "ameerazam08", "ashleykleynhans"
26
+ ]
27
+ }
28
+
29
+ # Asynchronous function to fetch user content using Hugging Face API
30
+ async def fetch_user_content(username):
31
+ try:
32
+ # Fetch models and datasets
33
+ models = list(await asyncio.to_thread(api.list_models, author=username))
34
+ datasets = list(await asyncio.to_thread(api.list_datasets, author=username))
35
+ return {
36
+ "username": username,
37
+ "models": models,
38
+ "datasets": datasets
39
+ }
40
+ except Exception as e:
41
+ return {"username": username, "error": str(e)}
42
+
43
+ # Fetch all users concurrently
44
+ async def fetch_all_users(usernames):
45
+ tasks = [fetch_user_content(username) for username in usernames]
46
+ return await asyncio.gather(*tasks)
47
+
48
+ # Function to download the user page using requests
49
+ def download_user_page(username):
50
+ url = f"https://huggingface.co/{username}"
51
+ try:
52
+ response = requests.get(url)
53
+ response.raise_for_status()
54
+ html_content = response.text
55
+ html_file_path = os.path.join(HTML_DIR, f"{username}.html")
56
+ with open(html_file_path, "w", encoding='utf-8') as html_file:
57
+ html_file.write(html_content)
58
+ return html_file_path, None
59
+ except Exception as e:
60
+ return None, str(e)
61
+
62
+ # Function to base64 encode the HTML file
63
+ def encode_html_to_base64(html_file_path):
64
+ try:
65
+ with open(html_file_path, "rb") as file:
66
+ encoded_bytes = base64.b64encode(file.read())
67
+ encoded_str = encoded_bytes.decode('utf-8')
68
+ return encoded_str, None
69
+ except Exception as e:
70
+ return None, str(e)
71
+
72
+ # Cache the downloaded and encoded content to avoid redundant operations
73
+ @st.cache_data(show_spinner=False, ttl=3600)
74
+ def get_cached_base64_html(username):
75
+ html_file_path, error = download_user_page(username)
76
+ if error:
77
+ return None, error
78
+ encoded_str, encode_error = encode_html_to_base64(html_file_path)
79
+ if encode_error:
80
+ return None, encode_error
81
+ return encoded_str, None
82
+
83
+ # Streamlit app setup
84
+ st.title("Hugging Face User Page Downloader 📄✨")
85
+
86
+ # Text area with default list of usernames
87
+ user_input = st.text_area(
88
+ "Enter Hugging Face usernames (one per line):",
89
+ value="\n".join(default_users["users"]),
90
+ height=300
91
+ )
92
+
93
+ # Show User Content button
94
+ if st.button("Show User Content"):
95
+ if user_input:
96
+ username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
97
+
98
+ # Collect statistics for Plotly graphs
99
+ stats = {"username": [], "models_count": [], "datasets_count": []}
100
+
101
+ st.markdown("### User Content Overview")
102
+ for username in username_list:
103
+ with st.container():
104
+ # Profile link
105
+ st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")
106
+
107
+ # Fetch models and datasets
108
+ user_data = asyncio.run(fetch_user_content(username))
109
+ if "error" in user_data:
110
+ st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
111
+ else:
112
+ models = user_data["models"]
113
+ datasets = user_data["datasets"]
114
+
115
+ # Encode the downloaded HTML page to base64
116
+ base64_html, encode_error = get_cached_base64_html(username)
117
+ if base64_html:
118
+ # Provide a download link for the base64-encoded HTML
119
+ b64_filename = f"{username}_base64.txt"
120
+ st.download_button(
121
+ label=f"📥 Download {username}'s Base64 Encoded HTML",
122
+ data=base64_html,
123
+ file_name=b64_filename,
124
+ mime="text/plain"
125
+ )
126
+ else:
127
+ st.error(f"Failed to encode HTML for {username}: {encode_error}")
128
+
129
+ # Add to statistics
130
+ stats["username"].append(username)
131
+ stats["models_count"].append(len(models))
132
+ stats["datasets_count"].append(len(datasets))
133
+
134
+ # Display models
135
+ with st.expander(f"🧠 Models ({len(models)})", expanded=False):
136
+ if models:
137
+ for model in models:
138
+ model_name = model.modelId.split("/")[-1]
139
+ st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
140
+ else:
141
+ st.markdown("No models found. 🤷‍♂️")
142
+
143
+ # Display datasets
144
+ with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
145
+ if datasets:
146
+ for dataset in datasets:
147
+ dataset_name = dataset.id.split("/")[-1]
148
+ st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
149
+ else:
150
+ st.markdown("No datasets found. 🤷‍♀️")
151
+
152
+ st.markdown("---")
153
+
154
+ # Plotly graphs to visualize the number of models and datasets each user has
155
+ if stats["username"]:
156
+ st.markdown("### User Content Statistics")
157
+
158
+ # Number of models per user
159
+ fig_models = px.bar(
160
+ x=stats["username"],
161
+ y=stats["models_count"],
162
+ labels={'x': 'Username', 'y': 'Number of Models'},
163
+ title="Number of Models per User"
164
+ )
165
+ st.plotly_chart(fig_models)
166
+
167
+ # Number of datasets per user
168
+ fig_datasets = px.bar(
169
+ x=stats["username"],
170
+ y=stats["datasets_count"],
171
+ labels={'x': 'Username', 'y': 'Number of Datasets'},
172
+ title="Number of Datasets per User"
173
+ )
174
+ st.plotly_chart(fig_datasets)
175
+
176
+ else:
177
+ st.warning("Please enter at least one username. Don't be shy! 😅")
178
+
179
+ # Sidebar instructions
180
+ st.sidebar.markdown("""
181
+ ## How to use:
182
+ 1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
183
+ 2. Click **'Show User Content'**.
184
+ 3. View each user's models and datasets along with a link to their Hugging Face profile.
185
+ 4. **Download a base64-encoded HTML page** for each user by clicking the download button.
186
+ 5. Check out the statistics visualizations below!
187
+ """)