File size: 7,765 Bytes
3d7abcf
 
 
d071597
 
3d7abcf
d071597
8685a73
d071597
b838740
d071597
 
 
 
 
 
3d7abcf
 
8685a73
 
3d7abcf
b838740
e04bd55
3d7abcf
e04bd55
3d7abcf
 
e04bd55
3d7abcf
 
e04bd55
 
 
 
 
b3439f4
 
e04bd55
 
 
 
b3439f4
8685a73
b3439f4
8685a73
b3439f4
 
 
 
 
 
 
8685a73
3d7abcf
b3439f4
 
e04bd55
 
 
 
 
b3439f4
e04bd55
b3439f4
e04bd55
 
 
 
 
b3439f4
e04bd55
 
 
 
b3439f4
 
e04bd55
 
 
 
 
 
 
 
 
 
 
b3439f4
3d7abcf
 
 
 
 
 
 
 
 
 
 
 
 
 
e04bd55
d071597
e04bd55
d071597
 
 
b3439f4
 
e04bd55
 
 
 
b3439f4
e04bd55
d071597
e04bd55
 
d071597
b3439f4
3d7abcf
 
e04bd55
3d7abcf
d071597
 
3d7abcf
e04bd55
 
3d7abcf
 
 
 
 
d071597
 
 
3d7abcf
 
 
 
 
e04bd55
b3439f4
e04bd55
3d7abcf
 
 
 
 
b3439f4
 
 
3d7abcf
 
 
 
 
8685a73
 
 
3d7abcf
 
 
 
b3439f4
d071597
3d7abcf
b838740
e04bd55
 
 
 
 
 
 
 
b3439f4
e04bd55
 
b3439f4
e04bd55
 
 
 
 
 
b3439f4
 
 
 
 
 
 
8685a73
b3439f4
 
e04bd55
 
b3439f4
8685a73
5ca1599
 
8685a73
 
 
 
 
e04bd55
 
b3439f4
3d7abcf
b3439f4
3d7abcf
d071597
8685a73
 
3d7abcf
 
d071597
8685a73
d071597
 
3d7abcf
8685a73
0c55f72
8685a73
 
 
3d7abcf
8685a73
d071597
3d7abcf
8685a73
3d7abcf
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import gradio as gr
from huggingface_hub import HfApi, hf_hub_download
from huggingface_hub.repocard import metadata_load
import requests
import re
import pandas as pd
from huggingface_hub import ModelCard
import os 


def pass_emoji(passed):
    if passed is True:
        passed = "โœ…"
    else:
        passed = "โŒ"
    return passed

api = HfApi()
USERNAMES_DATASET_ID = "huggingface-course/audio-course-u7-hands-on"
HF_TOKEN = os.environ.get("HF_TOKEN")


def get_user_models(hf_username, task):
    """
    List the user's models for a given task
    :param hf_username: User HF username
    """
    models = api.list_models(author=hf_username, filter=[task])
    user_model_ids = [x.modelId for x in models]

    match task:
      case "audio-classification":
        dataset = 'marsyas/gtzan'
      case "automatic-speech-recognition":
        dataset = 'PolyAI/minds14'
      case "text-to-speech":
        dataset = ""
      case _:
        print("Unsupported task")

    dataset_specific_models = []

    if dataset == "":
      return user_model_ids
    else:
        for model in user_model_ids:
          meta = get_metadata(model)
          if meta is None:
              continue
          try:
            if meta["datasets"] == [dataset]:
                dataset_specific_models.append(model)
          except:
            continue
        return dataset_specific_models

def calculate_best_result(user_models, task):
  """
  Calculate the best results of a unit for a given task
  :param user_model_ids: models of a user
  """

  best_model = ""

  if task == "audio-classification":
    best_result = -100
    larger_is_better = True
  elif task == "automatic-speech-recognition":
    best_result = 100
    larger_is_better = False

  for model in user_models:
    meta = get_metadata(model)
    if meta is None:
      continue
    metric = parse_metrics(model, task)

    if larger_is_better:
     if metric > best_result:
      best_result = metric
      best_model = meta['model-index'][0]["name"]
    else:
      if metric < best_result:
        best_result = metric
        best_model = meta['model-index'][0]["name"]

  return best_result, best_model


def get_metadata(model_id):
  """
  Get model metadata (contains evaluation data)
  :param model_id
  """
  try:
    readme_path = hf_hub_download(model_id, filename="README.md")
    return metadata_load(readme_path)
  except requests.exceptions.HTTPError:
    # 404 README.md not found
    return None


def extract_metric(model_card_content, task):
    """
    Extract the metric value from the models' model card
    :param model_card_content: model card content
    """
    accuracy_pattern = r"Accuracy: (\d+\.\d+)"
    wer_pattern = r"Wer: (\d+\.\d+)"

    if task == "audio-classification":
      pattern = accuracy_pattern
    elif task == "automatic-speech-recognition":
      pattern = wer_pattern

    match = re.search(pattern, model_card_content)
    if match:
        metric = match.group(1)
        return float(metric)
    else:
        return None


def parse_metrics(model, task):
  """
  Get model card and parse it
  :param model_id: model id
  """
  card = ModelCard.load(model)
  return extract_metric(card.content, task)


def certification(hf_username):
  results_certification = [
      {
          "unit": "Unit 4: Audio Classification",
          "task": "audio-classification",
          "baseline_metric": 0.87,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
      },
  {
          "unit": "Unit 5: Automatic Speech Recognition",
          "task": "automatic-speech-recognition",
          "baseline_metric": 0.37,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
  },
  {
          "unit": "Unit 6: Text-to-Speech",
          "task": "text-to-speech",
          "baseline_metric": 0,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
  },
  {
          "unit": "Unit 7: Audio applications",
          "task": "demo",
          "baseline_metric": 0,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
  },
  ]

  for unit in results_certification:
    unit["passed"] = pass_emoji(unit["passed_"])

    match unit["task"]:
      case "audio-classification":
        try:
          user_ac_models = get_user_models(hf_username, task = "audio-classification")
          best_result, best_model_id = calculate_best_result(user_ac_models, task = "audio-classification")
          unit["best_result"] = best_result
          unit["best_model_id"] = best_model_id
          if unit["best_result"] >= unit["baseline_metric"]:
            unit["passed_"] = True
            unit["passed"] = pass_emoji(unit["passed_"])
        except: print("Either no relevant models found, or no metrics in the model card for audio classificaiton")
      case "automatic-speech-recognition":
        try:
          user_asr_models = get_user_models(hf_username, task = "automatic-speech-recognition")
          best_result, best_model_id = calculate_best_result(user_asr_models, task = "automatic-speech-recognition")
          unit["best_result"] = best_result
          unit["best_model_id"] = best_model_id
          if unit["best_result"] <= unit["baseline_metric"]:
            unit["passed_"] = True
            unit["passed"] = pass_emoji(unit["passed_"])
        except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
      case "text-to-speech":
        try:
          user_tts_models = get_user_models(hf_username, task = "text-to-speech")
          if user_tts_models:
            unit["best_result"] = 0
            unit["best_model_id"] = user_tts_models[0]
            unit["passed_"] = True
            unit["passed"] = pass_emoji(unit["passed_"])
        except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
      case "demo":
        u7_usernames = hf_hub_download(USERNAMES_DATASET_ID, repo_type = "dataset", filename="usernames.csv", token=HF_TOKEN)
        u7_users = pd.read_csv(u7_usernames)
        if hf_username in u7_users['username']:
            unit["best_result"] = 0
            unit["best_model_id"] = "Demo check passed, no model id"
            unit["passed_"] = True
            unit["passed"] = pass_emoji(unit["passed_"])
      case _:
        print("Unknown task")

  print(results_certification)

  df = pd.DataFrame(results_certification)
  df = df[['passed', 'unit', 'task', 'baseline_metric', 'best_result', 'best_model_id']]
  return df    

with gr.Blocks() as demo:
    gr.Markdown(f"""
    # ๐Ÿ† Check your progress in the Audio Course ๐Ÿ†

    - To get a certificate of completion, you must **pass 3 out of 4 assignments before July 31st 2023**.
    - To get an honors certificate, you must **pass 4 out of 4 assignments before July 31st 2023**.

    For the assignments where you have to train a model, your model's metric should be equal to or better than the baseline metric.
    For the Unit 7 assignment, first, check your demo with the [Unit 7 assessment space](https://huggingface.co/spaces/huggingface-course/audio-course-u7-assessment)

    Make sure that you have uploaded your model(s) to Hub, and that your Unit 7 demo is public.
    To check your progress, type your Hugging Face Username here (in my case MariaK)
    """)

    hf_username = gr.Textbox(placeholder="MariaK", label="Your Hugging Face Username")
    check_progress_button = gr.Button(value="Check my progress")
    output = gr.components.Dataframe(value=certification(hf_username))
    check_progress_button.click(fn=certification, inputs=hf_username, outputs=output)

demo.launch()