joffrey Thomas
Leaderboard
33e4196 verified
raw
history blame
3.22 kB
import os
import requests
from dataclasses import dataclass
import re
import streamlit as st
@dataclass
class GistInfo:
gist_id: str
filename: str
url: str
model_name: str
model_id: str
model: str
agieval: float
gpt4all: float
truthfulqa: float
bigbench: float
average: float
def update_gist(content, gist_id, access_token):
"""
Update the content of a GitHub Gist.
Args:
content (str): The new content of the gist.
gist_id (str): The ID of the gist to update.
access_token (str): GitHub personal access token with gist permissions.
"""
api_url = f"https://api.github.com/gists/{gist_id}"
headers = {
"Authorization": f"token {access_token}",
"Accept": "application/vnd.github.v3+json"
}
data = {
"files": {
"YALL - Yet Another LLM Leaderboard.md": {
"content": content
}
}
}
response = requests.patch(api_url, json=data, headers=headers)
if response.status_code == 200:
print("Gist updated successfully.")
else:
print("Failed to update gist. Status code:", response.status_code)
print("Response:", response.json())
@st.cache_data
def create_yall():
# Dummy data
gist_infos = [
GistInfo(
gist_id="dummy_gist_id_1",
filename="Model-1.md",
url="https://gist.github.com/dummy_gist_id_1",
model_name="Model 1",
model_id="model-1",
model="Model 1",
agieval=95.4,
gpt4all=88.7,
truthfulqa=90.3,
bigbench=85.6,
average=90.0
),
GistInfo(
gist_id="dummy_gist_id_2",
filename="Model-2.md",
url="https://gist.github.com/dummy_gist_id_2",
model_name="Model 2",
model_id="model-2",
model="Model 2",
agieval=89.1,
gpt4all=85.0,
truthfulqa=87.5,
bigbench=83.0,
average=86.2
),
GistInfo(
gist_id="dummy_gist_id_3",
filename="Model-3.md",
url="https://gist.github.com/dummy_gist_id_3",
model_name="Model 3",
model_id="model-3",
model="Model 3",
agieval=78.2,
gpt4all=81.4,
truthfulqa=79.5,
bigbench=77.0,
average=79.0
)
]
# Sort the list by average
gist_infos = sorted(gist_infos, key=lambda x: x.average, reverse=True)
# Create markdown table
markdown_table = "| Model | Average | AGIEval | GPT4All | TruthfulQA | Bigbench |\n"
markdown_table += "|---|---:|---:|---:|---:|---:|\n"
for gist in gist_infos:
model_link = f"[{gist.model_id}](https://huggingface.co/{gist.model_id})"
markdown_table += f"| {model_link} [📄]({gist.url}) | {gist.average} | {gist.agieval} | {gist.gpt4all} | {gist.truthfulqa} | {gist.bigbench} |\n"
# Update YALL's gist with dummy gist ID and token
update_gist(content=markdown_table, gist_id="dummy_gist_id_yall", access_token="dummy_access_token")
return markdown_table