IlyasMoutawwakil HF staff commited on
Commit
a830adb
Β·
1 Parent(s): f8badc6
app.py CHANGED
@@ -18,6 +18,7 @@ from src.assets.text_content import (
18
  )
19
 
20
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
 
21
  LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
22
  MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB πŸ–₯️"}
23
  ALL_COLUMNS_MAPPING = {
@@ -208,6 +209,8 @@ def filter_query(
208
  # Demo interface
209
  demo = gr.Blocks(css=custom_css)
210
  with demo:
 
 
211
  # leaderboard title
212
  gr.HTML(TITLE)
213
  # introduction text
 
18
  )
19
 
20
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
21
+ LOGO_URL = "https://huggingface.co/spaces/optimum/llm-perf-leaderboard/resolve/main/huggy_bench.png"
22
  LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
23
  MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB πŸ–₯️"}
24
  ALL_COLUMNS_MAPPING = {
 
209
  # Demo interface
210
  demo = gr.Blocks(css=custom_css)
211
  with demo:
212
+ # logo
213
+ gr.HTML(f'<img src="{LOGO_URL}">', elem_classes="logo")
214
  # leaderboard title
215
  gr.HTML(TITLE)
216
  # introduction text
src/assets/css_html_js.py CHANGED
@@ -1,4 +1,11 @@
1
  custom_css = """
 
 
 
 
 
 
 
2
  .descriptive-text {
3
  font-size: 16px !important;
4
  }
 
1
  custom_css = """
2
+ .logo {
3
+ width: 300px;
4
+ height: auto;
5
+ margin: 0 auto;
6
+ max-width: 100%
7
+ object-fit: contain;
8
+ }
9
  .descriptive-text {
10
  font-size: 16px !important;
11
  }
src/assets/text_content.py CHANGED
@@ -13,9 +13,8 @@ ABOUT_TEXT = """<h3>About the πŸ€— LLM-Perf Leaderboard πŸ‹οΈ</h3>
13
  <li>To avoid communication-dependent results, only one GPU is used.</li>
14
  <li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">πŸ€— Open LLM Leaderboard</a>.</li>
15
  <li>LLMs are running on a singleton batch with a prompt size of 256 and generating a 1000 tokens.</li>
16
- <li>Peak memory is measured in MB during the generate pass using Py3NVML while assuring the GPU's isolation.</li>
17
  <li>Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.</li>
18
- <li>Each pair of (Model Type, Weight Class) is represented by the best scored model. This LLM is the one used for all the hardware/backend/optimization experiments.</li>
19
  </ul>
20
  """
21
 
 
13
  <li>To avoid communication-dependent results, only one GPU is used.</li>
14
  <li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">πŸ€— Open LLM Leaderboard</a>.</li>
15
  <li>LLMs are running on a singleton batch with a prompt size of 256 and generating a 1000 tokens.</li>
 
16
  <li>Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.</li>
17
+ <li>We measure three types of memory: Max Allocated Memory, Max Reserved Memory and Max Used Memory. The first two being reported by PyTorch and the last one being observed using PyNVML.</li>
18
  </ul>
19
  """
20