Daniel van Strien
commited on
Commit
·
39cd921
1
Parent(s):
8dd872c
add autogenerated tab
Browse files
app.py
CHANGED
@@ -14,7 +14,7 @@ pd.options.plotting.backend = "plotly"
|
|
14 |
|
15 |
|
16 |
def download_dataset():
|
17 |
-
return load_dataset("open-source-metrics/model-repos-stats", split="train")
|
18 |
|
19 |
|
20 |
def _clean_tags(tags: Optional[Union[str, List[str]]]):
|
@@ -30,6 +30,16 @@ def _clean_tags(tags: Optional[Union[str, List[str]]]):
|
|
30 |
return []
|
31 |
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
def prep_dataset():
|
34 |
ds = download_dataset()
|
35 |
df = ds.to_pandas()
|
@@ -39,8 +49,10 @@ def prep_dataset():
|
|
39 |
df["has_languages"] = df.languages.apply(len) > 0
|
40 |
df["has_tags"] = df.tags.apply(len) > 0
|
41 |
df["has_dataset"] = df.datasets.apply(len) > 0
|
42 |
-
df["has_co2"] = df.co2.
|
43 |
df["has_co2"] = df.co2.apply(lambda x: x is not None)
|
|
|
|
|
44 |
df = df.drop(columns=["Unnamed: 0"])
|
45 |
df.to_parquet("data.parquet")
|
46 |
return df
|
@@ -148,6 +160,14 @@ def metadata_coverage_by_library(metadata_field):
|
|
148 |
return df.groupby("library")[metadata_field].mean().sort_values().plot.barh()
|
149 |
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
df = load_data()
|
152 |
top_n = df.library.value_counts().shape[0]
|
153 |
libraries = [library for library in df.library.unique() if library]
|
@@ -200,6 +220,12 @@ with gr.Blocks() as demo:
|
|
200 |
metadata_field.change(
|
201 |
metadata_coverage_by_library, [metadata_field], plot, queue=False
|
202 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
with gr.Tab("Model Cards"):
|
205 |
gr.Markdown(
|
|
|
14 |
|
15 |
|
16 |
def download_dataset():
|
17 |
+
return load_dataset("open-source-metrics/model-repos-stats", split="train", ignore_verifications=True)
|
18 |
|
19 |
|
20 |
def _clean_tags(tags: Optional[Union[str, List[str]]]):
|
|
|
30 |
return []
|
31 |
|
32 |
|
33 |
+
def _is_generated_from_tag(tags):
|
34 |
+
return any("generated" in tag for tag in tags)
|
35 |
+
|
36 |
+
|
37 |
+
def _parse_tags_for_generated(tags):
|
38 |
+
for tag in tags:
|
39 |
+
if "generated" in tag:
|
40 |
+
return tag
|
41 |
+
|
42 |
+
|
43 |
def prep_dataset():
|
44 |
ds = download_dataset()
|
45 |
df = ds.to_pandas()
|
|
|
49 |
df["has_languages"] = df.languages.apply(len) > 0
|
50 |
df["has_tags"] = df.tags.apply(len) > 0
|
51 |
df["has_dataset"] = df.datasets.apply(len) > 0
|
52 |
+
df["has_co2"] = df.co2.notnull()
|
53 |
df["has_co2"] = df.co2.apply(lambda x: x is not None)
|
54 |
+
df['has_license'] = df.license.notnull()
|
55 |
+
df['is_generated'] = df.tags.apply(_is_generated_from_tag)
|
56 |
df = df.drop(columns=["Unnamed: 0"])
|
57 |
df.to_parquet("data.parquet")
|
58 |
return df
|
|
|
160 |
return df.groupby("library")[metadata_field].mean().sort_values().plot.barh()
|
161 |
|
162 |
|
163 |
+
def metadata_coverage_by_autogenerated(metadata_field):
|
164 |
+
df = load_data()
|
165 |
+
subset_df = df[df['is_generated']].copy(deep=True)
|
166 |
+
subset_df.reset_index()
|
167 |
+
subset_df['autogenerated-from'] = subset_df.tags.apply(_parse_tags_for_generated)
|
168 |
+
return subset_df.groupby("autogenerated-from")[metadata_field].mean().sort_values().plot.barh()
|
169 |
+
|
170 |
+
|
171 |
df = load_data()
|
172 |
top_n = df.library.value_counts().shape[0]
|
173 |
libraries = [library for library in df.library.unique() if library]
|
|
|
220 |
metadata_field.change(
|
221 |
metadata_coverage_by_library, [metadata_field], plot, queue=False
|
222 |
)
|
223 |
+
with gr.Tab("Auto generated model cards"):
|
224 |
+
metadata_field = gr.Dropdown(choices=metadata_coverage_columns)
|
225 |
+
plot = gr.Plot()
|
226 |
+
metadata_field.change(
|
227 |
+
metadata_coverage_by_autogenerated, [metadata_field], plot, queue=False
|
228 |
+
)
|
229 |
|
230 |
with gr.Tab("Model Cards"):
|
231 |
gr.Markdown(
|