File size: 2,687 Bytes
1a3c9e6
 
 
 
af674e3
 
 
 
 
1a3c9e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af674e3
 
 
1a3c9e6
 
 
 
 
 
 
 
 
 
 
af674e3
 
1a3c9e6
 
af674e3
1a3c9e6
af674e3
1a3c9e6
 
af674e3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from datasets import load_dataset
from huggingface_hub import ModelCard
from huggingface_hub import HfApi

import gradio as gr
import pandas as pd

api = HfApi()

repo_id = "librarian-bots/model_cards_with_metadata"

dataset = load_dataset(repo_id, split='train')
dataset = dataset.filter(lambda x: x['library_name']=='transformers')

list_commits = api.list_repo_commits(repo_id, repo_type="dataset")
commits_date_dict = {commit.created_at.strftime("%m/%d/%Y"):commit.commit_id for commit in list_commits}
current_date = "latest"

def get_data(commit_date="latest"):
    ds_kwargs = {}
    if commit_date != "latest":
        current_date = commit_date
        commit_id = commits_date_dict[commit_date]
        ds_kwargs = {"revision": commit_id}

    dataset = load_dataset(repo_id, split='train', **ds_kwargs)
    dataset = dataset.filter(lambda x: x['library_name']=='transformers')

    def pipeline_tag_not_in_card(card):
        try:
            model_card_data = ModelCard(card).data
            if model_card_data.library_name is None:
                return True
            return False
        except AttributeError:
            return False
        except Exception:
            return False

    ds = dataset.map(lambda x: {"missing_library_name": pipeline_tag_not_in_card(x['card'])}, num_proc=4)


    data = pd.DataFrame(
        {
            "name": ["Total Number of transformers Model", "Total number of models with missing 'library_name: transformers' in model card."],
            "count": [len(ds), sum(ds["missing_library_name"])],
        }
    )
    return data

def fetch_fn(commit_date="latest"):
    data = get_data(commit_date=commit_date)
    return gr.BarPlot(
        data,
        x="name",
        y="count",
        title="Count of Model cards with the correct library_name tag",
        height=256,
        width=1024,
        tooltip=["name", "count"],
        vertical=False
    )

data = get_data()

with gr.Blocks() as bar_plot:
    with gr.Column():
        with gr.Row():
            plot = gr.BarPlot(
                data,
                x="name",
                y="count",
                title=f"Count of Model cards with the correct library_name tag at the date {current_date}",
                height=256,
                width=1024,
                tooltip=["name", "count"],
                vertical=False
            )
        with gr.Column():
            display = gr.Dropdown(
                choices=list(commits_date_dict.keys()),
                value="latest",
                label="Type of Bar Plot",
                
            )
        
        display.change(fetch_fn, inputs=display, outputs=plot)

bar_plot.launch()