File size: 3,219 Bytes
3d13fcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33e4196
3d13fcf
33e4196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d13fcf
 
 
 
 
 
 
 
 
 
 
33e4196
 
3d13fcf
33e4196
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import requests
from dataclasses import dataclass
import re
import streamlit as st

@dataclass
class GistInfo:
    gist_id: str
    filename: str
    url: str
    model_name: str
    model_id: str
    model: str
    agieval: float
    gpt4all: float
    truthfulqa: float
    bigbench: float
    average: float

def update_gist(content, gist_id, access_token):
    """
    Update the content of a GitHub Gist.
    
    Args:
    content (str): The new content of the gist.
    gist_id (str): The ID of the gist to update.
    access_token (str): GitHub personal access token with gist permissions.
    """
    api_url = f"https://api.github.com/gists/{gist_id}"
    headers = {
        "Authorization": f"token {access_token}",
        "Accept": "application/vnd.github.v3+json"
    }
    data = {
        "files": {
            "YALL - Yet Another LLM Leaderboard.md": {
                "content": content
            }
        }
    }

    response = requests.patch(api_url, json=data, headers=headers)

    if response.status_code == 200:
        print("Gist updated successfully.")
    else:
        print("Failed to update gist. Status code:", response.status_code)
        print("Response:", response.json())

@st.cache_data
def create_yall():
    # Dummy data
    gist_infos = [
        GistInfo(
            gist_id="dummy_gist_id_1",
            filename="Model-1.md",
            url="https://gist.github.com/dummy_gist_id_1",
            model_name="Model 1",
            model_id="model-1",
            model="Model 1",
            agieval=95.4,
            gpt4all=88.7,
            truthfulqa=90.3,
            bigbench=85.6,
            average=90.0
        ),
        GistInfo(
            gist_id="dummy_gist_id_2",
            filename="Model-2.md",
            url="https://gist.github.com/dummy_gist_id_2",
            model_name="Model 2",
            model_id="model-2",
            model="Model 2",
            agieval=89.1,
            gpt4all=85.0,
            truthfulqa=87.5,
            bigbench=83.0,
            average=86.2
        ),
        GistInfo(
            gist_id="dummy_gist_id_3",
            filename="Model-3.md",
            url="https://gist.github.com/dummy_gist_id_3",
            model_name="Model 3",
            model_id="model-3",
            model="Model 3",
            agieval=78.2,
            gpt4all=81.4,
            truthfulqa=79.5,
            bigbench=77.0,
            average=79.0
        )
    ]

    # Sort the list by average
    gist_infos = sorted(gist_infos, key=lambda x: x.average, reverse=True)
    
    # Create markdown table
    markdown_table = "| Model | Average | AGIEval | GPT4All | TruthfulQA | Bigbench |\n"
    markdown_table += "|---|---:|---:|---:|---:|---:|\n"
    
    for gist in gist_infos:
        model_link = f"[{gist.model_id}](https://huggingface.co/{gist.model_id})"
        markdown_table += f"| {model_link} [📄]({gist.url}) | {gist.average} | {gist.agieval} | {gist.gpt4all} | {gist.truthfulqa} | {gist.bigbench} |\n"
    
    # Update YALL's gist with dummy gist ID and token
    update_gist(content=markdown_table, gist_id="dummy_gist_id_yall", access_token="dummy_access_token")

    return markdown_table