davidpomerenke commited on
Commit
bc4afa0
·
verified ·
1 Parent(s): c9e9db6

Upload from GitHub Actions: Try moving `cache` calls that cause CI issues

Browse files
Files changed (2) hide show
  1. evals/models.py +5 -4
  2. evals/tasks.py +6 -11
evals/models.py CHANGED
@@ -93,9 +93,10 @@ def get_current_popular_models(date: date):
93
  return [get_model(model["model_permaslug"]) for model in data]
94
 
95
 
96
- popular_models = get_historical_popular_models(
97
- date.today()
98
- ) + get_current_popular_models(date.today())
 
99
  popular_models = [get_model(m) for m in popular_models if get_model(m)]
100
  popular_models = [
101
  m for m in popular_models if m["endpoint"] and not m["endpoint"]["is_free"]
@@ -104,7 +105,7 @@ popular_models = [m["slug"] for m in popular_models]
104
  popular_models = [
105
  m for m in popular_models if m and m not in models and m not in blocklist
106
  ]
107
- models += popular_models[:5]
108
 
109
  load_dotenv()
110
  client = AsyncOpenAI(
 
93
  return [get_model(model["model_permaslug"]) for model in data]
94
 
95
 
96
+ popular_models = (
97
+ get_historical_popular_models(date.today())[:5]
98
+ + get_current_popular_models(date.today())[:5]
99
+ )
100
  popular_models = [get_model(m) for m in popular_models if get_model(m)]
101
  popular_models = [
102
  m for m in popular_models if m["endpoint"] and not m["endpoint"]["is_free"]
 
105
  popular_models = [
106
  m for m in popular_models if m and m not in models and m not in blocklist
107
  ]
108
+ models += popular_models
109
 
110
  load_dotenv()
111
  client = AsyncOpenAI(
evals/tasks.py CHANGED
@@ -24,7 +24,6 @@ target_languages = languages[languages["in_benchmark"]].sample(
24
  )
25
 
26
 
27
- @cache
28
  async def translate_and_evaluate(model, bcp_47, sentence_nr, mode="from"):
29
  original_language = languages[languages["bcp_47"] == bcp_47].iloc[0]
30
  target_language = target_languages.iloc[sentence_nr]
@@ -78,7 +77,6 @@ async def translate_and_evaluate(model, bcp_47, sentence_nr, mode="from"):
78
  # metadata = pd.read_csv("data/floresp-v2.0-rc.3/metadata_dev.tsv", sep="\t")
79
 
80
 
81
- @cache
82
  async def classify_and_evaluate(model, bcp_47, nr):
83
  language = languages[languages["bcp_47"] == bcp_47].iloc[0]
84
  sentences = flores_sentences(language)
@@ -161,7 +159,6 @@ def corrupt_sentence(sentence):
161
  return sentence[:start] + "<mask>" + sentence[end:]
162
 
163
 
164
- @cache
165
  async def mlm_and_evaluate(model, language_bcp_47, nr):
166
  language = languages[languages["bcp_47"] == language_bcp_47].iloc[0]
167
  sentences = flores_sentences(language)
@@ -206,7 +203,6 @@ async def mlm_and_evaluate(model, language_bcp_47, nr):
206
  ]
207
 
208
 
209
- @cache
210
  async def mmlu_and_evaluate(model, language_bcp_47, nr):
211
  ds_name, examples, task = load_mmlu(language_bcp_47, nr)
212
  if not task:
@@ -254,7 +250,6 @@ async def mmlu_and_evaluate(model, language_bcp_47, nr):
254
  ]
255
 
256
 
257
- @cache
258
  async def transcribe_and_evaluate(model, language_bcp_47, nr):
259
  language = languages[languages["bcp_47"] == language_bcp_47].iloc[0]
260
  fleurs = pd.read_csv(
@@ -287,10 +282,10 @@ async def transcribe_and_evaluate(model, language_bcp_47, nr):
287
 
288
 
289
  tasks = {
290
- "translation_from": partial(translate_and_evaluate, mode="from"),
291
- "translation_to": partial(translate_and_evaluate, mode="to"),
292
- # "classification": classify_and_evaluate,
293
- # "mlm": mlm_and_evaluate,
294
- "mmlu": mmlu_and_evaluate,
295
- # "asr": transcribe_and_evaluate,
296
  }
 
24
  )
25
 
26
 
 
27
  async def translate_and_evaluate(model, bcp_47, sentence_nr, mode="from"):
28
  original_language = languages[languages["bcp_47"] == bcp_47].iloc[0]
29
  target_language = target_languages.iloc[sentence_nr]
 
77
  # metadata = pd.read_csv("data/floresp-v2.0-rc.3/metadata_dev.tsv", sep="\t")
78
 
79
 
 
80
  async def classify_and_evaluate(model, bcp_47, nr):
81
  language = languages[languages["bcp_47"] == bcp_47].iloc[0]
82
  sentences = flores_sentences(language)
 
159
  return sentence[:start] + "<mask>" + sentence[end:]
160
 
161
 
 
162
  async def mlm_and_evaluate(model, language_bcp_47, nr):
163
  language = languages[languages["bcp_47"] == language_bcp_47].iloc[0]
164
  sentences = flores_sentences(language)
 
203
  ]
204
 
205
 
 
206
  async def mmlu_and_evaluate(model, language_bcp_47, nr):
207
  ds_name, examples, task = load_mmlu(language_bcp_47, nr)
208
  if not task:
 
250
  ]
251
 
252
 
 
253
  async def transcribe_and_evaluate(model, language_bcp_47, nr):
254
  language = languages[languages["bcp_47"] == language_bcp_47].iloc[0]
255
  fleurs = pd.read_csv(
 
282
 
283
 
284
  tasks = {
285
+ "translation_from": cache(partial(translate_and_evaluate, mode="from")),
286
+ "translation_to": cache(partial(translate_and_evaluate, mode="to")),
287
+ # "classification": cache(classify_and_evaluate),
288
+ # "mlm": cache(mlm_and_evaluate),
289
+ "mmlu": cache(mmlu_and_evaluate),
290
+ # "asr": cache(transcribe_and_evaluate),
291
  }