import streamlit as st import pandas as pd st.title("‎‎‎ ‎‎ ‎ ‎ ‎ ‎ ‎ ‎🏋️‍♂️ benchbench-Leaderboard 🏋️‍♂️") # df = pd.read_csv("BAT_w_arena_10_random.csv") # df = ( # ( # df.rename( # columns={ # "z_score": "Z_Score", # "benchmark": "Benchmark", # } # ).drop( # columns=[ # "Unnamed: 0", # "z_test_pass", # ] # ) # ) # .sort_values("Z_Score", ascending=False) # .query( # 'Benchmark!="Aggregate" and Benchmark!="MAGI" and Benchmark!="Alpaca(v2, len adj)" and Benchmark!="GPT4All"' # ) # ) # df.replace( # { # "Arena Elo": "LMSys Arena", # "Hugging-6": "HF OpenLLM", # "Alpaca(v2)": "Alpaca v2", # "Alpaca(v1)": "Alpaca v1", # "EQ-Bench(v2)": "EQ-Bench v2", # }, # inplace=True, # ) # col1, col2, col3 = st.columns(3) # with col1: # st.header("‎ ‎ ‎ ‎ ‎ ‎ ‎ ‎ Agree") # st.dataframe(df.query("Z_Score>=0"), hide_index=True) # with col2: # st.header("‎ ‎‎ ‎ Disagree") # st.dataframe(df.query("Z_Score<0").sort_values("Z_Score"), hide_index=True) # with col3: # st.header("‎ ‎‎ ‎ Configs") # # st.selectbox(label="Reference Benchmarks", options=["LMSys Arena"]) # options = st.multiselect( # "Reference Benchmarks", # ["LMSys Arena", "Open Compass", "Yellow", "Red", "Blue"], # ["LMSys Arena", "Open Compass"], # ) # st.selectbox(label="# models compared", options=[20]) # st.selectbox(label="Model Select Strategy", options=["Random"]) # st.write("‎‎‎‎‎‎‎") # st.button("Upload a new benchmark")