Spaces:
Runtime error
Runtime error
# A simple script that loops over all public models and get their libary_name | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
from collections import Counter | |
from huggingface_hub import HfApi | |
from datasets import load_dataset | |
api = HfApi() | |
list_models = api.list_models() | |
def fetch_dataset_and_init(): | |
dataset = load_dataset("librarian-bots/model_cards_with_metadata", split="train") | |
library_names = dataset["library_name"] | |
string_counts = Counter(library_names) | |
string_counts_series = pd.Series(string_counts) | |
# Sort the series in descending order | |
df = string_counts_series.sort_values(ascending=False).to_frame() | |
df.columns = ["count"] | |
df = df.reset_index() | |
df = df.rename(columns={"index": "library_name"}) | |
df.replace(to_replace=[None], value="No library_name", inplace=True) | |
df_log = df.copy() | |
df_log['count'] = np.log(df_log['count']) | |
return df, df_log | |
df, df_log = fetch_dataset_and_init() | |
def get_current_nb_models(): | |
# We need this hack since `list_models` returns a generator.. | |
total_models = sum(1 for _ in list_models) | |
diff_models = total_models - df["count"].sum() | |
return str(diff_models) | |
plot_height = 512 | |
plot_width = 1512 | |
select_box = ["all"] | |
top_k = len(df) | |
def bar_plot_fn(display, top_k, select_box): | |
if display == "simple": | |
if select_box is not None and ("all" not in select_box or select_box != ["all"]): | |
current_df = df[df["library_name"].isin(select_box)] | |
else: | |
current_df = df[:top_k] | |
return gr.BarPlot( | |
current_df, | |
x="library_name", | |
y="count", | |
tooltip=["library_name", "count"], | |
height=plot_height, | |
width=plot_width | |
) | |
elif display == "log": | |
if select_box is not None and ("all" not in select_box or select_box != ["all"]): | |
current_df = df_log[df_log["library_name"].isin(select_box)] | |
else: | |
current_df = df_log[:top_k] | |
return gr.BarPlot( | |
current_df, | |
x="library_name", | |
y="count", | |
tooltip=["library_name", "count"], | |
height=plot_height, | |
width=plot_width | |
) | |
with gr.Blocks() as bar_plot: | |
with gr.Column(): | |
with gr.Column(): | |
display = gr.Dropdown( | |
choices=[ | |
"simple", | |
"log", | |
], | |
value="simple", | |
label="Type of Bar Plot", | |
) | |
top_k = gr.Slider( | |
label="Select top-K most used library_name (This leads to a no-op if you selected something else than 'all' in the columns below)", | |
value=len(df), | |
minimum=1, | |
maximum=len(df), | |
step=1, | |
) | |
with gr.Column(): | |
plot = gr.BarPlot() | |
with gr.Row(): | |
fetch_button = gr.Button(value="Fetch current number of models without model cards (takes up to 1min to fetch everything)") | |
text_box = gr.Textbox(value="", label="Number of models without model cards") | |
with gr.Column(): | |
select_box = gr.Dropdown( | |
["all"] + df["library_name"].tolist(), value=["all"], multiselect=True, label="Libraries to inspect", info="Select specific libraries to inspect" | |
) | |
top_k.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot) | |
display.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot) | |
select_box.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot) | |
fetch_button.click(get_current_nb_models, outputs=[text_box]) | |
bar_plot.load(fn=bar_plot_fn, inputs=[display, top_k], outputs=plot) | |
bar_plot.launch() |