David Pomerenke commited on
Commit
d91b022
·
1 Parent(s): c1db7ba

Language selection checkboxes & filtering in backend

Browse files
evals/backend.py CHANGED
@@ -3,11 +3,12 @@ import json
3
  import numpy as np
4
  import pandas as pd
5
  import uvicorn
6
- from fastapi import FastAPI
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.middleware.gzip import GZipMiddleware
9
  from fastapi.responses import JSONResponse
10
  from fastapi.staticfiles import StaticFiles
 
11
  from languages import languages
12
  from models import models
13
  from tables import aggregate, make_country_table, make_language_table, make_model_table
@@ -18,7 +19,7 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"])
18
  app.add_middleware(GZipMiddleware, minimum_size=1000)
19
 
20
  with open("results.json", "r") as f:
21
- results = json.load(f)
22
 
23
 
24
  def serialize(df):
@@ -26,8 +27,17 @@ def serialize(df):
26
 
27
 
28
  @app.post("/api/data")
29
- def data():
30
- _, lang_results, model_results, task_results = aggregate(results)
 
 
 
 
 
 
 
 
 
31
  model_table = make_model_table(model_results, models)
32
  language_table = make_language_table(lang_results, languages)
33
  datasets_df = pd.read_json("data/datasets.json")
 
3
  import numpy as np
4
  import pandas as pd
5
  import uvicorn
6
+ from fastapi import FastAPI, Request
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.middleware.gzip import GZipMiddleware
9
  from fastapi.responses import JSONResponse
10
  from fastapi.staticfiles import StaticFiles
11
+
12
  from languages import languages
13
  from models import models
14
  from tables import aggregate, make_country_table, make_language_table, make_model_table
 
19
  app.add_middleware(GZipMiddleware, minimum_size=1000)
20
 
21
  with open("results.json", "r") as f:
22
+ results = pd.DataFrame(json.load(f))
23
 
24
 
25
  def serialize(df):
 
27
 
28
 
29
  @app.post("/api/data")
30
+ async def data(request: Request):
31
+ body = await request.body()
32
+ data = json.loads(body)
33
+ selected_languages = data.get("selectedLanguages", {})
34
+ df = results
35
+ if selected_languages:
36
+ df = df[df["bcp_47"].isin(l["bcp_47"] for l in selected_languages)]
37
+
38
+ _, lang_results, model_results, task_results = aggregate(df)
39
+ print(lang_results)
40
+ # lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
41
  model_table = make_model_table(model_results, models)
42
  language_table = make_language_table(lang_results, languages)
43
  datasets_df = pd.read_json("data/datasets.json")
evals/main.py CHANGED
@@ -1,10 +1,8 @@
1
  import asyncio
2
  import json
3
 
4
- import numpy as np
5
  from languages import languages
6
  from models import model_fast, models
7
- from rich import print
8
  from tasks import tasks
9
  from tqdm.asyncio import tqdm_asyncio
10
 
@@ -39,6 +37,7 @@ async def evaluate():
39
 
40
  async def main():
41
  results = await evaluate()
 
42
  with open("results.json", "w") as f:
43
  json.dump(results, f, indent=2, ensure_ascii=False)
44
 
 
1
  import asyncio
2
  import json
3
 
 
4
  from languages import languages
5
  from models import model_fast, models
 
6
  from tasks import tasks
7
  from tqdm.asyncio import tqdm_asyncio
8
 
 
37
 
38
  async def main():
39
  results = await evaluate()
40
+ results = [r for group in results for r in group]
41
  with open("results.json", "w") as f:
42
  json.dump(results, f, indent=2, ensure_ascii=False)
43
 
evals/models.py CHANGED
@@ -96,6 +96,7 @@ models = pd.DataFrame(models, columns=["id"])
96
 
97
  api = HfApi()
98
 
 
99
  def get_metadata(id):
100
  try:
101
  info = api.model_info(id)
 
96
 
97
  api = HfApi()
98
 
99
+ @cache
100
  def get_metadata(id):
101
  try:
102
  info = api.model_info(id)
evals/tables.py CHANGED
@@ -5,7 +5,6 @@ make_country_table = make_country_table
5
 
6
 
7
  def aggregate(results):
8
- results = pd.DataFrame([r for rs in results for r in rs])
9
  results = (
10
  results.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
11
  )
@@ -14,7 +13,6 @@ def aggregate(results):
14
  .agg({"score": "mean", "model": "nunique"})
15
  .reset_index()
16
  )
17
- # lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
18
  model_results = (
19
  results.groupby(["model", "task", "metric"])
20
  .agg({"score": "mean", "bcp_47": "nunique"})
@@ -32,14 +30,6 @@ def mean(lst):
32
  return sum(lst) / len(lst) if lst else None
33
 
34
 
35
- def fmt_name(s):
36
- return (
37
- " ".join(w.capitalize() for w in s.split("-"))
38
- .replace("Gpt", "GPT")
39
- .replace("ai", "AI")
40
- )
41
-
42
-
43
  def make_model_table(df, models):
44
  df["task_metric"] = df["task"] + "_" + df["metric"]
45
  df = df.drop(columns=["task", "metric"])
@@ -51,13 +41,10 @@ def make_model_table(df, models):
51
  df[row] = df[row].round(2)
52
  df = pd.merge(df, models, left_on="model", right_on="id", how="left")
53
  df["creation_date"] = df["creation_date"].dt.strftime("%Y-%m-%d")
54
- df["provider"] = df["model"].str.split("/").str[0].apply(fmt_name)
55
- df["model"] = df["model"].str.split("/").str[1].apply(fmt_name)
56
  df["rank"] = df.index + 1
57
  df = df[
58
  [
59
  "rank",
60
- "provider",
61
  "model",
62
  "hf_id",
63
  "creation_date",
 
5
 
6
 
7
  def aggregate(results):
 
8
  results = (
9
  results.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
10
  )
 
13
  .agg({"score": "mean", "model": "nunique"})
14
  .reset_index()
15
  )
 
16
  model_results = (
17
  results.groupby(["model", "task", "metric"])
18
  .agg({"score": "mean", "bcp_47": "nunique"})
 
30
  return sum(lst) / len(lst) if lst else None
31
 
32
 
 
 
 
 
 
 
 
 
33
  def make_model_table(df, models):
34
  df["task_metric"] = df["task"] + "_" + df["metric"]
35
  df = df.drop(columns=["task", "metric"])
 
41
  df[row] = df[row].round(2)
42
  df = pd.merge(df, models, left_on="model", right_on="id", how="left")
43
  df["creation_date"] = df["creation_date"].dt.strftime("%Y-%m-%d")
 
 
44
  df["rank"] = df.index + 1
45
  df = df[
46
  [
47
  "rank",
 
48
  "model",
49
  "hf_id",
50
  "creation_date",
frontend/src/App.js CHANGED
@@ -15,20 +15,11 @@ function App () {
15
  const [loading, setLoading] = useState(true)
16
  const [error, setError] = useState(null)
17
  const [allSuggestions, setAllSuggestions] = useState([])
18
- const [languageFilters, setLanguageFilters] = useState({
19
- language_name: { value: null, matchMode: FilterMatchMode.EQUALS }, // via global search
20
- family: { value: null, matchMode: FilterMatchMode.IN },
21
- speakers: { value: null, matchMode: FilterMatchMode.BETWEEN }
22
- })
23
- const [modelFilters, setModelFilters] = useState({
24
- model: { value: null, matchMode: FilterMatchMode.ENDS_WITH }, // via global search
25
- type: { value: null, matchMode: FilterMatchMode.IN },
26
- size: { value: null, matchMode: FilterMatchMode.BETWEEN }
27
- })
28
  useEffect(() => {
29
  fetch('/api/data', {
30
  method: 'POST',
31
- // body: JSON.stringify({ modelFilters, languageFilters }),
32
  })
33
  .then(response => {
34
  if (!response.ok) {
@@ -44,7 +35,7 @@ function App () {
44
  setError(err.message)
45
  setLoading(false)
46
  })
47
- }, [modelFilters, languageFilters])
48
 
49
  useEffect(() => {
50
  if (data) {
@@ -52,14 +43,15 @@ function App () {
52
  type: 'Model',
53
  value: item.model,
54
  detail: item.provider,
55
- searchText: item.provider.toLowerCase() + ' ' + item.model.toLowerCase()
56
  }))
57
  const languages = data.language_table.map(item => ({
58
  type: 'Language',
59
  value: item.autonym,
60
  detail: item.language_name,
61
  searchText:
62
- item.language_name.toLowerCase() + ' ' + item.autonym.toLowerCase()
 
63
  }))
64
  const datasets = data.dataset_table.map(item => ({
65
  type: 'Dataset',
@@ -107,25 +99,10 @@ function App () {
107
  <AutoComplete
108
  allSuggestions={allSuggestions}
109
  onComplete={item => {
110
- const languageValue =
111
- item.type === 'Language' ? item.detail : null
112
- const modelValue = item.type === 'Model' ? item.value : null
113
- setLanguageFilters(prevFilters => ({
114
- ...prevFilters,
115
- language_name: {
116
- value: languageValue,
117
- matchMode: FilterMatchMode.EQUALS
118
- }
119
- }))
120
- setModelFilters(prevFilters => ({
121
- ...prevFilters,
122
- model: {
123
- value: modelValue,
124
- matchMode: FilterMatchMode.ENDS_WITH
125
- }
126
- }))
127
  }}
128
  />
 
129
  </header>
130
  <main
131
  style={{
@@ -152,11 +129,7 @@ function App () {
152
  maxWidth: 'min(100vw, 800px)'
153
  }}
154
  >
155
- <ModelTable
156
- data={data.model_table}
157
- filters={modelFilters}
158
- setFilters={setModelFilters}
159
- />
160
  </div>
161
  <div
162
  style={{
@@ -166,8 +139,8 @@ function App () {
166
  >
167
  <LanguageTable
168
  data={data.language_table}
169
- filters={languageFilters}
170
- setFilters={setLanguageFilters}
171
  />
172
  </div>
173
  <div
 
15
  const [loading, setLoading] = useState(true)
16
  const [error, setError] = useState(null)
17
  const [allSuggestions, setAllSuggestions] = useState([])
18
+ const [selectedLanguages, setSelectedLanguages] = useState([])
 
 
 
 
 
 
 
 
 
19
  useEffect(() => {
20
  fetch('/api/data', {
21
  method: 'POST',
22
+ body: JSON.stringify({ selectedLanguages })
23
  })
24
  .then(response => {
25
  if (!response.ok) {
 
35
  setError(err.message)
36
  setLoading(false)
37
  })
38
+ }, [selectedLanguages])
39
 
40
  useEffect(() => {
41
  if (data) {
 
43
  type: 'Model',
44
  value: item.model,
45
  detail: item.provider,
46
+ searchText: item.model.toLowerCase()
47
  }))
48
  const languages = data.language_table.map(item => ({
49
  type: 'Language',
50
  value: item.autonym,
51
  detail: item.language_name,
52
  searchText:
53
+ item.language_name.toLowerCase() + ' ' + item.autonym.toLowerCase(),
54
+ bcp_47: item.bcp_47
55
  }))
56
  const datasets = data.dataset_table.map(item => ({
57
  type: 'Dataset',
 
99
  <AutoComplete
100
  allSuggestions={allSuggestions}
101
  onComplete={item => {
102
+ if (item.type === 'Language') setSelectedLanguages(() => [item])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  }}
104
  />
105
+ <pre>{JSON.stringify(selectedLanguages, null, 2)}</pre>
106
  </header>
107
  <main
108
  style={{
 
129
  maxWidth: 'min(100vw, 800px)'
130
  }}
131
  >
132
+ <ModelTable data={data.model_table} />
 
 
 
 
133
  </div>
134
  <div
135
  style={{
 
139
  >
140
  <LanguageTable
141
  data={data.language_table}
142
+ selectedLanguages={selectedLanguages}
143
+ setSelectedLanguages={setSelectedLanguages}
144
  />
145
  </div>
146
  <div
frontend/src/components/AutoComplete.js CHANGED
@@ -5,7 +5,6 @@ const AutoComplete = ({ allSuggestions, onComplete }) => {
5
  const [suggestions, setSuggestions] = useState([])
6
 
7
  const search = e => {
8
- console.log(allSuggestions)
9
  const matches = allSuggestions.filter(suggestion =>
10
  suggestion.searchText.includes(e.query.toLowerCase())
11
  )
 
5
  const [suggestions, setSuggestions] = useState([])
6
 
7
  const search = e => {
 
8
  const matches = allSuggestions.filter(suggestion =>
9
  suggestion.searchText.includes(e.query.toLowerCase())
10
  )
frontend/src/components/LanguageTable.js CHANGED
@@ -6,7 +6,13 @@ import { useState, useEffect } from 'react'
6
  import { Slider } from 'primereact/slider'
7
  import ScoreField from './ScoreField'
8
 
9
- const LanguageTable = ({ data, filters, setFilters }) => {
 
 
 
 
 
 
10
  const families = [...new Set(data.map(item => item.family))].slice(0, 10)
11
  families.push("Other")
12
  const familyRowFilterTemplate = options => {
@@ -124,10 +130,15 @@ const LanguageTable = ({ data, filters, setFilters }) => {
124
  removableSort
125
  filters={filters}
126
  filterDisplay='menu'
 
 
 
 
127
  scrollable
128
  scrollHeight='600px'
129
  id='language-table'
130
  >
 
131
  <Column
132
  field='language_name'
133
  header='Language'
 
6
  import { Slider } from 'primereact/slider'
7
  import ScoreField from './ScoreField'
8
 
9
+ const LanguageTable = ({ data, selectedLanguages, setSelectedLanguages }) => {
10
+ const [filters, setFilters] = useState({
11
+ language_name: { value: null, matchMode: FilterMatchMode.EQUALS }, // via global search
12
+ family: { value: null, matchMode: FilterMatchMode.IN },
13
+ speakers: { value: null, matchMode: FilterMatchMode.BETWEEN }
14
+ })
15
+
16
  const families = [...new Set(data.map(item => item.family))].slice(0, 10)
17
  families.push("Other")
18
  const familyRowFilterTemplate = options => {
 
130
  removableSort
131
  filters={filters}
132
  filterDisplay='menu'
133
+ selectionMode='checkbox'
134
+ selection={selectedLanguages}
135
+ onSelectionChange={e => setSelectedLanguages(e.value)}
136
+ frozenValue={selectedLanguages}
137
  scrollable
138
  scrollHeight='600px'
139
  id='language-table'
140
  >
141
+ <Column selectionMode="multiple" headerStyle={{ width: '3rem' }} />
142
  <Column
143
  field='language_name'
144
  header='Language'
frontend/src/components/ModelTable.js CHANGED
@@ -7,7 +7,11 @@ import Medal from './Medal'
7
  import { Slider } from 'primereact/slider'
8
  import ScoreField from './ScoreField'
9
 
10
- const ModelTable = ({ data, filters, setFilters }) => {
 
 
 
 
11
  const rankBodyTemplate = rowData => {
12
  return <Medal rank={rowData.rank} />
13
  }
@@ -99,9 +103,17 @@ const ModelTable = ({ data, filters, setFilters }) => {
99
  return <div style={{ textAlign: 'center' }}>{sizeStr}</div>
100
  }
101
 
 
 
 
 
 
 
 
102
  const modelBodyTemplate = rowData => {
 
103
  return (
104
- <div style={{ fontWeight: 'bold', height: '100%' }}>{rowData.model}</div>
105
  )
106
  }
107
 
@@ -135,7 +147,7 @@ const ModelTable = ({ data, filters, setFilters }) => {
135
  id='model-table'
136
  >
137
  <Column field='rank' body={rankBodyTemplate} />
138
- <Column field='provider' header='Provider' style={{ minWidth: '5rem' }} />
139
  <Column
140
  field='model'
141
  header='Model'
 
7
  import { Slider } from 'primereact/slider'
8
  import ScoreField from './ScoreField'
9
 
10
+ const ModelTable = ({ data }) => {
11
+ const [filters, setFilters] = useState({
12
+ type: { value: null, matchMode: FilterMatchMode.IN },
13
+ size: { value: null, matchMode: FilterMatchMode.BETWEEN }
14
+ })
15
  const rankBodyTemplate = rowData => {
16
  return <Medal rank={rowData.rank} />
17
  }
 
103
  return <div style={{ textAlign: 'center' }}>{sizeStr}</div>
104
  }
105
 
106
+ const capitalize = s => String(s).charAt(0).toUpperCase() + String(s).slice(1)
107
+
108
+ const providerBodyTemplate = rowData => {
109
+ const providerName = rowData.model.split('/')[0].split('-').map(capitalize).join(' ')
110
+ return providerName
111
+ }
112
+
113
  const modelBodyTemplate = rowData => {
114
+ const modelName = rowData.model.split('/')[1].split('-').map(capitalize).join(' ')
115
  return (
116
+ <div style={{ fontWeight: 'bold', height: '100%' }}>{modelName}</div>
117
  )
118
  }
119
 
 
147
  id='model-table'
148
  >
149
  <Column field='rank' body={rankBodyTemplate} />
150
+ <Column field='provider' header='Provider' style={{ minWidth: '5rem' }} body={providerBodyTemplate} />
151
  <Column
152
  field='model'
153
  header='Model'
results.json CHANGED
The diff for this file is too large to render. See raw diff