陈俊杰
commited on
Commit
·
b781bf5
1
Parent(s):
f51ed55
table
Browse files
app.py
CHANGED
@@ -265,10 +265,13 @@ elif page == "LeaderBoard":
|
|
265 |
"Spearman (Non-Factoid QA)": [],
|
266 |
}
|
267 |
|
|
|
|
|
|
|
268 |
# teamId 唯一标识码
|
269 |
DG = {
|
270 |
-
"TeamId":
|
271 |
-
"Methods":
|
272 |
"Accuracy": [0.5806, 0.5483, 0.6001, 0.6472],
|
273 |
"Kendall's Tau": [0.3243, 0.1739, 0.3042, 0.4167],
|
274 |
"Spearman": [0.3505, 0.1857, 0.3264, 0.4512]
|
@@ -276,8 +279,8 @@ elif page == "LeaderBoard":
|
|
276 |
df1 = pd.DataFrame(DG)
|
277 |
|
278 |
TE = {
|
279 |
-
"TeamId":
|
280 |
-
"Methods":
|
281 |
"Accuracy": [0.5107, 0.5050, 0.5461, 0.5581],
|
282 |
"Kendall's Tau": [0.1281, 0.0635, 0.2716, 0.3864],
|
283 |
"Spearman": [0.1352, 0.0667, 0.2867, 0.4157]
|
@@ -285,8 +288,8 @@ elif page == "LeaderBoard":
|
|
285 |
df2 = pd.DataFrame(TE)
|
286 |
|
287 |
SG = {
|
288 |
-
"TeamId":
|
289 |
-
"Methods":
|
290 |
"Accuracy": [0.6504, 0.6014, 0.7162, 0.7441],
|
291 |
"Kendall's Tau": [0.3957, 0.2688, 0.5092, 0.5001],
|
292 |
"Spearman": [0.4188, 0.2817, 0.5403, 0.5405],
|
@@ -294,18 +297,42 @@ elif page == "LeaderBoard":
|
|
294 |
df3 = pd.DataFrame(SG)
|
295 |
|
296 |
NFQA = {
|
297 |
-
"TeamId":
|
298 |
-
"Methods":
|
299 |
"Accuracy": [0.5935, 0.5817, 0.7000, 0.7203],
|
300 |
"Kendall's Tau": [0.2332, 0.2389, 0.4440, 0.4235],
|
301 |
"Spearman": [0.2443, 0.2492, 0.4630, 0.4511]
|
302 |
}
|
303 |
df4 = pd.DataFrame(NFQA)
|
304 |
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
|
310 |
# # 创建标签页
|
311 |
# tab1, tab2, tab3, tab4 = st.tabs(["DG", "TE", "SG", "NFQA"])
|
@@ -325,30 +352,45 @@ elif page == "LeaderBoard":
|
|
325 |
# with tab4:
|
326 |
# st.markdown("""<p class='main-text'>Task: Non-Factoid QA; Dataset: NF_CATS</p>""", unsafe_allow_html=True)
|
327 |
# st.dataframe(df4, use_container_width=True)
|
328 |
-
|
329 |
-
data = [DG, NFQA, SG, TE]
|
330 |
-
task = ["Dialogue Generation", "Non-Factoid QA", "Summary Generation", "Text Expansion"]
|
331 |
-
metric = ["Accuracy", "Kendall's Tau", "Spearman"]
|
332 |
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
|
348 |
-
df = pd.DataFrame(df)
|
349 |
-
for col in df.select_dtypes(include=['float64', 'int64']).columns:
|
350 |
-
|
351 |
-
st.dataframe(df,use_container_width=True)
|
352 |
|
353 |
st.markdown("""
|
354 |
🔗 To register for AEOLLM task, you can visit the following link and choose our AEOLLM task: [https://research.nii.ac.jp/ntcir/ntcir-18/howto.html](https://research.nii.ac.jp/ntcir/ntcir-18/howto.html).
|
|
|
265 |
"Spearman (Non-Factoid QA)": [],
|
266 |
}
|
267 |
|
268 |
+
TeamId = ["baseline1", "baseline2", "baseline3", "baseline4"]
|
269 |
+
Methods = ["chatglm3-6b", "baichuan2-13b", "chatglm-pro", "gpt-4o"]
|
270 |
+
|
271 |
# teamId 唯一标识码
|
272 |
DG = {
|
273 |
+
"TeamId": TeamId,
|
274 |
+
"Methods": Methods,
|
275 |
"Accuracy": [0.5806, 0.5483, 0.6001, 0.6472],
|
276 |
"Kendall's Tau": [0.3243, 0.1739, 0.3042, 0.4167],
|
277 |
"Spearman": [0.3505, 0.1857, 0.3264, 0.4512]
|
|
|
279 |
df1 = pd.DataFrame(DG)
|
280 |
|
281 |
TE = {
|
282 |
+
"TeamId": TeamId,
|
283 |
+
"Methods": Methods,
|
284 |
"Accuracy": [0.5107, 0.5050, 0.5461, 0.5581],
|
285 |
"Kendall's Tau": [0.1281, 0.0635, 0.2716, 0.3864],
|
286 |
"Spearman": [0.1352, 0.0667, 0.2867, 0.4157]
|
|
|
288 |
df2 = pd.DataFrame(TE)
|
289 |
|
290 |
SG = {
|
291 |
+
"TeamId": TeamId,
|
292 |
+
"Methods": Methods,
|
293 |
"Accuracy": [0.6504, 0.6014, 0.7162, 0.7441],
|
294 |
"Kendall's Tau": [0.3957, 0.2688, 0.5092, 0.5001],
|
295 |
"Spearman": [0.4188, 0.2817, 0.5403, 0.5405],
|
|
|
297 |
df3 = pd.DataFrame(SG)
|
298 |
|
299 |
NFQA = {
|
300 |
+
"TeamId": TeamId,
|
301 |
+
"Methods": Methods,
|
302 |
"Accuracy": [0.5935, 0.5817, 0.7000, 0.7203],
|
303 |
"Kendall's Tau": [0.2332, 0.2389, 0.4440, 0.4235],
|
304 |
"Spearman": [0.2443, 0.2492, 0.4630, 0.4511]
|
305 |
}
|
306 |
df4 = pd.DataFrame(NFQA)
|
307 |
|
308 |
+
OverAll = {
|
309 |
+
"TeamId": TeamId,
|
310 |
+
"Methods": Methods,
|
311 |
+
"Accuracy": [],
|
312 |
+
"Kendall's Tau": [],
|
313 |
+
"Spearman": []
|
314 |
+
}
|
315 |
+
|
316 |
+
data = [DG, NFQA, SG, TE]
|
317 |
+
task = ["Dialogue Generation", "Non-Factoid QA", "Summary Generation", "Text Expansion"]
|
318 |
+
metric = ["Accuracy", "Kendall's Tau", "Spearman"]
|
319 |
+
|
320 |
+
for m in metric:
|
321 |
+
# 每个指标
|
322 |
+
metric_score = [0] * len(TeamId)
|
323 |
+
for j in range(len(TeamId)):
|
324 |
+
# 每支队伍
|
325 |
+
for d in data:
|
326 |
+
metric_score[j] += d[m][j]
|
327 |
+
metric_score = [k / len(task) for k in metric_score]
|
328 |
+
OverAll[m] = metric_score
|
329 |
+
|
330 |
+
dfo = pd.DataFrame(OverAll)
|
331 |
+
|
332 |
+
df = [df1, df2, df3, df4, dfo]
|
333 |
+
for d in df:
|
334 |
+
for col in d.select_dtypes(include=['float64', 'int64']).columns:
|
335 |
+
d[col] = d[col].apply(lambda x: f"{x:.4f}")
|
336 |
|
337 |
# # 创建标签页
|
338 |
# tab1, tab2, tab3, tab4 = st.tabs(["DG", "TE", "SG", "NFQA"])
|
|
|
352 |
# with tab4:
|
353 |
# st.markdown("""<p class='main-text'>Task: Non-Factoid QA; Dataset: NF_CATS</p>""", unsafe_allow_html=True)
|
354 |
# st.dataframe(df4, use_container_width=True)
|
|
|
|
|
|
|
|
|
355 |
|
356 |
+
st.markdown("""<p class='main-text'>Overall</p>""", unsafe_allow_html=True)
|
357 |
+
st.dataframe(dfo, use_container_width=True)
|
358 |
+
|
359 |
+
st.markdown("""<p class='main-text'>Task: Dialogue Generation; Dataset: DialyDialog</p>""", unsafe_allow_html=True)
|
360 |
+
st.dataframe(df1, use_container_width=True)
|
361 |
+
|
362 |
+
st.markdown("""<p class='main-text'>Task: Text Expansion; Dataset: WritingPrompts</p>""", unsafe_allow_html=True)
|
363 |
+
st.dataframe(df2, use_container_width=True)
|
364 |
+
|
365 |
+
st.markdown("""<p class='main-text'>Task: Summary Generation; Dataset: Xsum</p>""", unsafe_allow_html=True)
|
366 |
+
st.dataframe(df3, use_container_width=True)
|
367 |
+
|
368 |
+
st.markdown("""<p class='main-text'>Task: Non-Factoid QA; Dataset: NF_CATS</p>""", unsafe_allow_html=True)
|
369 |
+
st.dataframe(df4, use_container_width=True)
|
370 |
+
|
371 |
+
# data = [DG, NFQA, SG, TE]
|
372 |
+
# task = ["Dialogue Generation", "Non-Factoid QA", "Summary Generation", "Text Expansion"]
|
373 |
+
# metric = ["Accuracy", "Kendall's Tau", "Spearman"]
|
374 |
+
|
375 |
+
# overall_total = [0] * len(df["TeamId"])
|
376 |
+
# for i, d in enumerate(data): # 每种数据集
|
377 |
+
# total = [0] * len(df["TeamId"]) # 长度初始化为方法数
|
378 |
+
# for j in range(len(metric)): # 每种指标
|
379 |
+
# index = f"{metric[j]} ({task[i]})"
|
380 |
+
# df[index] = d[metric[j]]
|
381 |
+
# for k in range(len(df["TeamId"])):
|
382 |
+
# total[k] += d[metric[j]][k]
|
383 |
+
# average_index = f"Average ({task[i]})"
|
384 |
+
# df[average_index] = [k / len(metric) for k in total]
|
385 |
+
# for k in range(len(df["TeamId"])):
|
386 |
+
# overall_total[k] += df[average_index][k]
|
387 |
+
|
388 |
+
# df["Average (all 4 datatsets)"] = [k / len(task) for k in overall_total]
|
389 |
|
390 |
+
# df = pd.DataFrame(df)
|
391 |
+
# for col in df.select_dtypes(include=['float64', 'int64']).columns:
|
392 |
+
# df[col] = df[col].apply(lambda x: f"{x:.4f}")
|
393 |
+
# st.dataframe(df,use_container_width=True)
|
394 |
|
395 |
st.markdown("""
|
396 |
🔗 To register for AEOLLM task, you can visit the following link and choose our AEOLLM task: [https://research.nii.ac.jp/ntcir/ntcir-18/howto.html](https://research.nii.ac.jp/ntcir/ntcir-18/howto.html).
|