justinxzhao
commited on
Commit
·
e10e00e
1
Parent(s):
a146b18
Minor styling changes.
Browse files
app.py
CHANGED
@@ -76,29 +76,36 @@ judge_options = df_response_judging["llm_judge"].unique().tolist()
|
|
76 |
|
77 |
st.set_page_config(page_title="Language Model Council", page_icon="🏛️", layout="wide")
|
78 |
|
79 |
-
#
|
80 |
-
|
81 |
-
|
82 |
-
# Define CSS to make buttons take full space
|
83 |
-
full_width_button_css = """
|
84 |
<style>
|
85 |
-
|
86 |
-
|
87 |
}
|
88 |
</style>
|
89 |
"""
|
90 |
-
st.markdown(full_width_button_css, unsafe_allow_html=True)
|
91 |
|
92 |
-
|
93 |
-
|
94 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
-
# Place a button in each column
|
97 |
with col1:
|
98 |
st.link_button(
|
99 |
"Data",
|
100 |
"https://huggingface.co/datasets/llm-council/emotional_application",
|
101 |
use_container_width=True,
|
|
|
102 |
)
|
103 |
|
104 |
with col2:
|
@@ -106,6 +113,7 @@ with col2:
|
|
106 |
"Paper",
|
107 |
"https://openreview.net/forum?id=EgEMEYECXz",
|
108 |
use_container_width=True,
|
|
|
109 |
)
|
110 |
|
111 |
with col3:
|
@@ -113,34 +121,13 @@ with col3:
|
|
113 |
"Github",
|
114 |
"https://github.com/llm-council/llm-council",
|
115 |
use_container_width=True,
|
|
|
116 |
)
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
text-align: center;
|
123 |
-
}
|
124 |
-
</style>
|
125 |
-
"""
|
126 |
-
|
127 |
-
st.markdown(center_css, unsafe_allow_html=True)
|
128 |
-
|
129 |
-
# Centered icon.
|
130 |
-
# image = Image.open("img/lmc_icon.png")
|
131 |
-
# img_base64 = pil_to_base64(image)
|
132 |
-
# centered_image_html = f"""
|
133 |
-
# <div style="text-align: center;">
|
134 |
-
# <img src="data:image/png;base64,{img_base64}" width="50"/>
|
135 |
-
# </div>
|
136 |
-
# """
|
137 |
-
# st.markdown(centered_image_html, unsafe_allow_html=True)
|
138 |
-
|
139 |
-
# Title and subtitle.
|
140 |
-
st.title("Language Model Council")
|
141 |
-
st.markdown(
|
142 |
-
"###### Benchmarking Foundation Models on Highly Subjective Tasks by Consensus :classical_building:"
|
143 |
-
)
|
144 |
|
145 |
# Render hero image.
|
146 |
with open("img/hero.svg", "r") as file:
|
@@ -238,10 +225,13 @@ with tabs[1]:
|
|
238 |
# Add randomize button at the top of the app
|
239 |
_, mid_column, _ = st.columns([0.4, 0.2, 0.4])
|
240 |
mid_column.button(
|
241 |
-
":game_die: Randomize!",
|
|
|
|
|
|
|
242 |
)
|
243 |
|
244 |
-
st.markdown("
|
245 |
# Create the selectors
|
246 |
st.session_state.selected_scenario = st.selectbox(
|
247 |
"Select Scenario",
|
@@ -284,7 +274,7 @@ with tabs[1]:
|
|
284 |
|
285 |
st.divider()
|
286 |
|
287 |
-
st.markdown("
|
288 |
|
289 |
# Create two columns for model selectors
|
290 |
col1, col2 = st.columns(2)
|
@@ -344,10 +334,8 @@ with tabs[1]:
|
|
344 |
|
345 |
st.divider()
|
346 |
|
347 |
-
|
348 |
-
st.markdown("
|
349 |
-
|
350 |
-
st.markdown("#### All council members")
|
351 |
col1, col2 = st.columns(2)
|
352 |
|
353 |
with col1:
|
@@ -373,7 +361,7 @@ with tabs[1]:
|
|
373 |
st.bar_chart(pairwise_counts_right)
|
374 |
|
375 |
# Create the llm_judge selector
|
376 |
-
st.markdown("
|
377 |
st.session_state.selected_judge = st.selectbox(
|
378 |
"Select Judge",
|
379 |
judge_options,
|
@@ -459,7 +447,7 @@ with tabs[1]:
|
|
459 |
|
460 |
with tabs[2]:
|
461 |
st.markdown("### Battles (Respondent vs. Respondent)")
|
462 |
-
st.
|
463 |
image = Image.open("img/llm_vs_llm_win_rates.png")
|
464 |
img_base64 = pil_to_base64(image)
|
465 |
centered_image_html = f"""
|
@@ -469,9 +457,11 @@ with tabs[2]:
|
|
469 |
"""
|
470 |
st.markdown(centered_image_html, unsafe_allow_html=True)
|
471 |
|
|
|
|
|
472 |
st.markdown("### Affinities (Judge vs. Respondent)")
|
473 |
|
474 |
-
st.
|
475 |
image = Image.open("img/raw.png")
|
476 |
img_base64 = pil_to_base64(image)
|
477 |
centered_image_html = f"""
|
@@ -481,7 +471,12 @@ with tabs[2]:
|
|
481 |
"""
|
482 |
st.markdown(centered_image_html, unsafe_allow_html=True)
|
483 |
|
484 |
-
|
|
|
|
|
|
|
|
|
|
|
485 |
image = Image.open("img/council_normalized.png")
|
486 |
img_base64 = pil_to_base64(image)
|
487 |
centered_image_html = f"""
|
@@ -491,9 +486,11 @@ with tabs[2]:
|
|
491 |
"""
|
492 |
st.markdown(centered_image_html, unsafe_allow_html=True)
|
493 |
|
|
|
|
|
494 |
st.markdown("### Agreement (Judge vs. Judge)")
|
495 |
|
496 |
-
st.
|
497 |
image = Image.open("img/judge_agreement.sidewise_cohen_kappa.png")
|
498 |
img_base64 = pil_to_base64(image)
|
499 |
centered_image_html = f"""
|
|
|
76 |
|
77 |
st.set_page_config(page_title="Language Model Council", page_icon="🏛️", layout="wide")
|
78 |
|
79 |
+
# Custom CSS to center title and header
|
80 |
+
center_css = """
|
|
|
|
|
|
|
81 |
<style>
|
82 |
+
h1, h2, h3, h6{
|
83 |
+
text-align: center;
|
84 |
}
|
85 |
</style>
|
86 |
"""
|
|
|
87 |
|
88 |
+
st.markdown(center_css, unsafe_allow_html=True)
|
89 |
+
|
90 |
+
# Title and subtitle.
|
91 |
+
st.title("Language Model Council")
|
92 |
+
st.markdown(
|
93 |
+
"### Benchmarking Foundation Models on Highly Subjective Tasks by Consensus :classical_building:"
|
94 |
+
)
|
95 |
+
st.markdown(
|
96 |
+
"###### [Justin Zhao](https://www.justinxzhao.com/)¹, [Flor Miriam Plaza-del-Arco](https://fmplaza.github.io/)², [Amanda Cercas Curry](https://amandacurry.github.io/)²"
|
97 |
+
)
|
98 |
+
st.markdown("###### ¹ Predibase, ² Bocconi University")
|
99 |
+
|
100 |
+
# Create three columns
|
101 |
+
_, col1, col2, col3, col4, _ = st.columns([0.3, 0.1, 0.1, 0.1, 0.1, 0.3])
|
102 |
|
|
|
103 |
with col1:
|
104 |
st.link_button(
|
105 |
"Data",
|
106 |
"https://huggingface.co/datasets/llm-council/emotional_application",
|
107 |
use_container_width=True,
|
108 |
+
type="primary",
|
109 |
)
|
110 |
|
111 |
with col2:
|
|
|
113 |
"Paper",
|
114 |
"https://openreview.net/forum?id=EgEMEYECXz",
|
115 |
use_container_width=True,
|
116 |
+
type="primary",
|
117 |
)
|
118 |
|
119 |
with col3:
|
|
|
121 |
"Github",
|
122 |
"https://github.com/llm-council/llm-council",
|
123 |
use_container_width=True,
|
124 |
+
type="primary",
|
125 |
)
|
126 |
|
127 |
+
with col4:
|
128 |
+
st.link_button(
|
129 |
+
"Website", "https://llm-council.com/", use_container_width=True, type="primary"
|
130 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
# Render hero image.
|
133 |
with open("img/hero.svg", "r") as file:
|
|
|
225 |
# Add randomize button at the top of the app
|
226 |
_, mid_column, _ = st.columns([0.4, 0.2, 0.4])
|
227 |
mid_column.button(
|
228 |
+
":game_die: Randomize!",
|
229 |
+
on_click=randomize_selection,
|
230 |
+
type="primary",
|
231 |
+
use_container_width=True,
|
232 |
)
|
233 |
|
234 |
+
st.markdown("#### 1. Select a scenario.")
|
235 |
# Create the selectors
|
236 |
st.session_state.selected_scenario = st.selectbox(
|
237 |
"Select Scenario",
|
|
|
274 |
|
275 |
st.divider()
|
276 |
|
277 |
+
st.markdown("#### 2. View responses.")
|
278 |
|
279 |
# Create two columns for model selectors
|
280 |
col1, col2 = st.columns(2)
|
|
|
334 |
|
335 |
st.divider()
|
336 |
|
337 |
+
st.markdown("#### 3. Response judging")
|
338 |
+
st.markdown("##### All council members")
|
|
|
|
|
339 |
col1, col2 = st.columns(2)
|
340 |
|
341 |
with col1:
|
|
|
361 |
st.bar_chart(pairwise_counts_right)
|
362 |
|
363 |
# Create the llm_judge selector
|
364 |
+
st.markdown("##### Individual LLM judges")
|
365 |
st.session_state.selected_judge = st.selectbox(
|
366 |
"Select Judge",
|
367 |
judge_options,
|
|
|
447 |
|
448 |
with tabs[2]:
|
449 |
st.markdown("### Battles (Respondent vs. Respondent)")
|
450 |
+
st.markdown("###### Expected win rates based on Terry-Bradley coefficients")
|
451 |
image = Image.open("img/llm_vs_llm_win_rates.png")
|
452 |
img_base64 = pil_to_base64(image)
|
453 |
centered_image_html = f"""
|
|
|
457 |
"""
|
458 |
st.markdown(centered_image_html, unsafe_allow_html=True)
|
459 |
|
460 |
+
st.divider()
|
461 |
+
|
462 |
st.markdown("### Affinities (Judge vs. Respondent)")
|
463 |
|
464 |
+
st.markdown("###### Raw affinities")
|
465 |
image = Image.open("img/raw.png")
|
466 |
img_base64 = pil_to_base64(image)
|
467 |
centered_image_html = f"""
|
|
|
471 |
"""
|
472 |
st.markdown(centered_image_html, unsafe_allow_html=True)
|
473 |
|
474 |
+
# Some extra space.
|
475 |
+
st.text("")
|
476 |
+
st.text("")
|
477 |
+
st.text("")
|
478 |
+
|
479 |
+
st.markdown("###### Council-Normalized")
|
480 |
image = Image.open("img/council_normalized.png")
|
481 |
img_base64 = pil_to_base64(image)
|
482 |
centered_image_html = f"""
|
|
|
486 |
"""
|
487 |
st.markdown(centered_image_html, unsafe_allow_html=True)
|
488 |
|
489 |
+
st.divider()
|
490 |
+
|
491 |
st.markdown("### Agreement (Judge vs. Judge)")
|
492 |
|
493 |
+
st.markdown("###### Sidewise Cohen's Kappa:")
|
494 |
image = Image.open("img/judge_agreement.sidewise_cohen_kappa.png")
|
495 |
img_base64 = pil_to_base64(image)
|
496 |
centered_image_html = f"""
|