Daniel van Strien commited on
Commit
39cd921
·
1 Parent(s): 8dd872c

add autogenerated tab

Browse files
Files changed (1) hide show
  1. app.py +28 -2
app.py CHANGED
@@ -14,7 +14,7 @@ pd.options.plotting.backend = "plotly"
14
 
15
 
16
  def download_dataset():
17
- return load_dataset("open-source-metrics/model-repos-stats", split="train")
18
 
19
 
20
  def _clean_tags(tags: Optional[Union[str, List[str]]]):
@@ -30,6 +30,16 @@ def _clean_tags(tags: Optional[Union[str, List[str]]]):
30
  return []
31
 
32
 
 
 
 
 
 
 
 
 
 
 
33
  def prep_dataset():
34
  ds = download_dataset()
35
  df = ds.to_pandas()
@@ -39,8 +49,10 @@ def prep_dataset():
39
  df["has_languages"] = df.languages.apply(len) > 0
40
  df["has_tags"] = df.tags.apply(len) > 0
41
  df["has_dataset"] = df.datasets.apply(len) > 0
42
- df["has_co2"] = df.co2.isnull()
43
  df["has_co2"] = df.co2.apply(lambda x: x is not None)
 
 
44
  df = df.drop(columns=["Unnamed: 0"])
45
  df.to_parquet("data.parquet")
46
  return df
@@ -148,6 +160,14 @@ def metadata_coverage_by_library(metadata_field):
148
  return df.groupby("library")[metadata_field].mean().sort_values().plot.barh()
149
 
150
 
 
 
 
 
 
 
 
 
151
  df = load_data()
152
  top_n = df.library.value_counts().shape[0]
153
  libraries = [library for library in df.library.unique() if library]
@@ -200,6 +220,12 @@ with gr.Blocks() as demo:
200
  metadata_field.change(
201
  metadata_coverage_by_library, [metadata_field], plot, queue=False
202
  )
 
 
 
 
 
 
203
 
204
  with gr.Tab("Model Cards"):
205
  gr.Markdown(
 
14
 
15
 
16
  def download_dataset():
17
+ return load_dataset("open-source-metrics/model-repos-stats", split="train", ignore_verifications=True)
18
 
19
 
20
  def _clean_tags(tags: Optional[Union[str, List[str]]]):
 
30
  return []
31
 
32
 
33
+ def _is_generated_from_tag(tags):
34
+ return any("generated" in tag for tag in tags)
35
+
36
+
37
+ def _parse_tags_for_generated(tags):
38
+ for tag in tags:
39
+ if "generated" in tag:
40
+ return tag
41
+
42
+
43
  def prep_dataset():
44
  ds = download_dataset()
45
  df = ds.to_pandas()
 
49
  df["has_languages"] = df.languages.apply(len) > 0
50
  df["has_tags"] = df.tags.apply(len) > 0
51
  df["has_dataset"] = df.datasets.apply(len) > 0
52
+ df["has_co2"] = df.co2.notnull()
53
  df["has_co2"] = df.co2.apply(lambda x: x is not None)
54
+ df['has_license'] = df.license.notnull()
55
+ df['is_generated'] = df.tags.apply(_is_generated_from_tag)
56
  df = df.drop(columns=["Unnamed: 0"])
57
  df.to_parquet("data.parquet")
58
  return df
 
160
  return df.groupby("library")[metadata_field].mean().sort_values().plot.barh()
161
 
162
 
163
+ def metadata_coverage_by_autogenerated(metadata_field):
164
+ df = load_data()
165
+ subset_df = df[df['is_generated']].copy(deep=True)
166
+ subset_df.reset_index()
167
+ subset_df['autogenerated-from'] = subset_df.tags.apply(_parse_tags_for_generated)
168
+ return subset_df.groupby("autogenerated-from")[metadata_field].mean().sort_values().plot.barh()
169
+
170
+
171
  df = load_data()
172
  top_n = df.library.value_counts().shape[0]
173
  libraries = [library for library in df.library.unique() if library]
 
220
  metadata_field.change(
221
  metadata_coverage_by_library, [metadata_field], plot, queue=False
222
  )
223
+ with gr.Tab("Auto generated model cards"):
224
+ metadata_field = gr.Dropdown(choices=metadata_coverage_columns)
225
+ plot = gr.Plot()
226
+ metadata_field.change(
227
+ metadata_coverage_by_autogenerated, [metadata_field], plot, queue=False
228
+ )
229
 
230
  with gr.Tab("Model Cards"):
231
  gr.Markdown(