Open-FinLLM-Leaderboard

Running

App Files Files Community

mirageco commited on May 21

Commit

b8db958

1 Parent(s): 39637bc

hardcoded model list for multifinben

Browse files

Files changed (5) hide show

frontend/src/pages/LeaderboardPage/LeaderboardPage.js +31 -36
frontend/src/pages/LeaderboardPage/components/Leaderboard/context/LeaderboardContext.js +16 -1
frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useDataUtils.js +163 -286
frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useLeaderboardData.js +4 -53
frontend/src/pages/LeaderboardPage/components/Leaderboard/utils/columnUtils.js +197 -0

frontend/src/pages/LeaderboardPage/LeaderboardPage.js CHANGED Viewed

@@ -1,49 +1,44 @@
-import { useEffect } from "react";
 import Leaderboard from "./components/Leaderboard/Leaderboard";
-import { Box } from "@mui/material";
-import PageHeader from "../../components/shared/PageHeader";
-import Logo from "../../components/Logo/Logo";
-import { useLeaderboardData } from "../../pages/LeaderboardPage/components/Leaderboard/hooks/useLeaderboardData";
-import { useLeaderboard } from "../../pages/LeaderboardPage/components/Leaderboard/context/LeaderboardContext";
-function LeaderboardPage() {
-  const { data, isLoading, error } = useLeaderboardData();
-  const { actions } = useLeaderboard();
-  useEffect(() => {
-    if (data) {
-      actions.setModels(data);
-    }
-    actions.setLoading(isLoading);
-    actions.setError(error);
-  }, [data, isLoading, error, actions]);
   return (
-    <Box
       sx={{
-        ph: 2,
         display: "flex",
         flexDirection: "column",
       }}
     >
-      <Box
-        sx={{ display: "flex", justifyContent: "center", pt: 6, mb: -4, pb: 0 }}
-      >
-        <Logo height="80px" />
       </Box>
-      <PageHeader
-        title="Open Financial LLM Leaderboard"
-        subtitle={
-          <>
-            Benchmark for large language models in {" "}
-            <span style={{ fontWeight: 600 }}>financial</span> domain {" "}
-            across multiple languages
-          </>
-        }
-      />
       <Leaderboard />
-    </Box>
   );
-}
 export default LeaderboardPage;

+import React from "react";
+import { Box, Typography, Container } from "@mui/material";
 import Leaderboard from "./components/Leaderboard/Leaderboard";
+const LeaderboardPage = () => {
   return (
+    <Container
+      maxWidth={false}
       sx={{
+        p: { xs: 1, sm: 2, md: 3 },
         display: "flex",
         flexDirection: "column",
+        alignItems: "center",
+        height: "100%",
+        maxWidth: "100vw",
+        overflow: "hidden"
       }}
     >
+      <Box sx={{ mb: 3, width: "100%", textAlign: "center" }}>
+        <Typography
+          variant="h4"
+          component="h1"
+          sx={{
+            fontWeight: 700,
+            mb: 1,
+            fontSize: { xs: "1.5rem", sm: "1.75rem", md: "2rem" },
+          }}
+        >
+          Open Financial LLM Leaderboard - Multi-modal & Multi-lingual
+        </Typography>
+        <Typography
+          variant="body1"
+          color="text.secondary"
+          sx={{ maxWidth: "800px", mx: "auto" }}
+        >
+          Comprehensive evaluation of language models on financial tasks across multiple languages and modalities
+        </Typography>
       </Box>
       <Leaderboard />
+    </Container>
   );
+};
 export default LeaderboardPage;

frontend/src/pages/LeaderboardPage/components/Leaderboard/context/LeaderboardContext.js CHANGED Viewed

@@ -29,7 +29,22 @@ const DEFAULT_DISPLAY = {
   scoreDisplay: TABLE_DEFAULTS.SCORE_DISPLAY,
   averageMode: TABLE_DEFAULTS.AVERAGE_MODE,
   rankingMode: TABLE_DEFAULTS.RANKING_MODE,
-  visibleColumns: TABLE_DEFAULTS.COLUMNS.DEFAULT_VISIBLE,
 };
 // Create initial counter structure

   scoreDisplay: TABLE_DEFAULTS.SCORE_DISPLAY,
   averageMode: TABLE_DEFAULTS.AVERAGE_MODE,
   rankingMode: TABLE_DEFAULTS.RANKING_MODE,
+  visibleColumns: [
+    'isPinned',
+    'rank',
+    'model_type',
+    'id',
+    'model.average_score',
+    'evaluations.vision_average',
+    'evaluations.audio_average',
+    'evaluations.english_average',
+    'evaluations.chinese_average',
+    'evaluations.japanese_average',
+    'evaluations.spanish_average',
+    'evaluations.greek_average',
+    'evaluations.bilingual_average',
+    'evaluations.multilingual_average'
+  ],
 };
 // Create initial counter structure

frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useDataUtils.js CHANGED Viewed

@@ -6,6 +6,91 @@ import {
 } from "../utils/searchUtils";
 import { ALLOWED_MODELS, isModelAllowed } from "../constants/allowedModels";
 // Calculate min/max averages
 export const useAverageRange = (data) => {
   return useMemo(() => {
@@ -39,142 +124,96 @@ export const useColorGenerator = (minAverage, maxAverage) => {
 // Process data with boolean standardization
 export const useProcessedData = (data, averageMode, visibleColumns) => {
   return useMemo(() => {
-    // First filter and process existing models
-    let processed = data.map((item) => {
-      // Calculate average score for Greek datasets
-      const greekDatasets = ['multifin', 'qa', 'fns', 'finnum', 'fintext'];
-      const greekScores = greekDatasets
-        .filter(dataset => item.evaluations[dataset]?.normalized_score !== undefined)
-        .map(dataset => item.evaluations[dataset].normalized_score);
-      const greekAverage = greekScores.length > 0
-        ? greekScores.reduce((a, b) => a + b, 0) / greekScores.length
-        : null;
-      // Add Greek average to evaluations object
-      const enhancedEvaluations = {
-        ...item.evaluations,
-        greek_average: greekAverage
       };
-      // Calculate average score for all visible evaluations (including greek_average, but excluding specific Greek datasets)
-      const includedEvaluations = {};
-      // Copy all non-Greek evaluation data
-      Object.entries(item.evaluations).forEach(([key, value]) => {
-        if (!greekDatasets.includes(key)) {
-          includedEvaluations[key] = value;
-        }
-      });
-      // Add Greek average
-      if (greekAverage !== null) {
-        includedEvaluations.greek_average = { normalized_score: greekAverage };
-      }
-      const evaluationScores = Object.entries(includedEvaluations)
-        .filter(([key]) => {
-          if (averageMode === "all") return true;
-          return visibleColumns.includes(`evaluations.${key}.normalized_score`);
-        })
-        .map(([, value]) => value.normalized_score);
-      const average =
-        evaluationScores.length > 0
-          ? evaluationScores.reduce((a, b) => a + b, 0) /
-            evaluationScores.length
-          : averageMode === "visible"
-          ? null
-          : 0;
-      // Boolean standardization
-      const standardizedFeatures = {
-        ...item.features,
-        is_moe: Boolean(item.features.is_moe),
-        is_flagged: Boolean(item.features.is_flagged),
-        is_highlighted_by_maintainer: Boolean(
-          item.features.is_highlighted_by_maintainer
-        ),
-        is_merged: Boolean(item.features.is_merged),
-        is_not_available_on_hub: Boolean(item.features.is_not_available_on_hub),
-      };
-      return {
-        ...item,
-        features: standardizedFeatures,
-        evaluations: enhancedEvaluations, // Use enhanced evaluations
         model: {
-          ...item.model,
-          has_chat_template: Boolean(item.model.has_chat_template),
-          average_score: average,
         },
-      };
-    });
-    // Create mapping of existing models, check which ones are in the allowed list
-    const existingModelsMap = {};
-    const filteredModels = [];
-    processed.forEach(model => {
-      if (isModelAllowed(model.model.name)) {
-        existingModelsMap[model.model.name] = model;
-        filteredModels.push(model);
-      }
     });
-    // Add "missing" entries, create placeholders for models in the allowed list that don't exist
-    ALLOWED_MODELS.forEach(allowedModelName => {
-      // Check if a matching model already exists
-      const modelExists = Object.keys(existingModelsMap).some(name =>
-        name.toLowerCase().includes(allowedModelName.toLowerCase())
-      );
-      if (!modelExists) {
-        // Create a "missing" placeholder
-        filteredModels.push({
-          id: `missing-${allowedModelName}`,
-          model: {
-            name: allowedModelName,
-            average_score: null,
-            type: "Unknown",
-          },
-          evaluations: {
-            greek_average: null
-          },
-          features: {
-            is_moe: false,
-            is_flagged: false,
-            is_highlighted_by_maintainer: false,
-            is_merged: false,
-            is_not_available_on_hub: true,
-          },
-          metadata: {
-            submission_date: new Date().toISOString(),
-          },
-          isMissing: true, // Mark as missing
-        });
-      }
-    });
-    // Sort the results
-    filteredModels.sort((a, b) => {
-      // Place missing models at the end
-      if (a.isMissing && !b.isMissing) return 1;
-      if (!a.isMissing && b.isMissing) return -1;
-      // If both are missing or both are not missing, sort by average score
       if (a.model.average_score === null && b.model.average_score === null)
         return 0;
       if (a.model.average_score === null) return 1;
       if (b.model.average_score === null) return -1;
       return b.model.average_score - a.model.average_score;
     });
-    return filteredModels.map((item, index) => ({
       ...item,
       static_rank: index + 1,
     }));
   }, [data, averageMode, visibleColumns]);
 };
 // Common filtering logic
 export const useFilteredData = (
   processedData,
@@ -188,179 +227,17 @@ export const useFilteredData = (
   isOfficialProviderActive = false
 ) => {
   return useMemo(() => {
-    const pinnedData = processedData.filter((row) => {
-      return pinnedModels.includes(row.id);
-    });
-    const unpinnedData = processedData.filter((row) => {
-      return !pinnedModels.includes(row.id);
-    });
-    let filteredUnpinned = unpinnedData;
-    // Filter by official providers
-    if (isOfficialProviderActive) {
-      filteredUnpinned = filteredUnpinned.filter(
-        (row) =>
-          row.features?.is_highlighted_by_maintainer ||
-          row.metadata?.is_highlighted_by_maintainer
-      );
-    }
-    // Filter by precision
-    if (selectedPrecisions.length > 0) {
-      filteredUnpinned = filteredUnpinned.filter((row) =>
-        selectedPrecisions.includes(row.model.precision)
-      );
-    }
-    // Filter by type
-    if (selectedTypes.length > 0) {
-      filteredUnpinned = filteredUnpinned.filter((row) => {
-        const modelType = row.model.type?.toLowerCase().trim();
-        return selectedTypes.some((type) => modelType?.includes(type));
-      });
-    }
-    // Filter by parameters
-    filteredUnpinned = filteredUnpinned.filter((row) => {
-      // Skip parameter filtering if no filter is active
-      if (paramsRange[0] === -1 && paramsRange[1] === 140) return true;
-      const params =
-        row.metadata?.params_billions || row.features?.params_billions;
-      if (params === undefined || params === null) return false;
-      return params >= paramsRange[0] && params < paramsRange[1];
-    });
-    // Filter by search
-    if (searchValue) {
-      const searchQueries = searchValue
-        .split(";")
-        .map((q) => q.trim())
-        .filter((q) => q);
-      if (searchQueries.length > 0) {
-        filteredUnpinned = filteredUnpinned.filter((row) => {
-          return searchQueries.some((query) => {
-            const { specialSearches, textSearch } = parseSearchQuery(query);
-            const specialSearchMatch = specialSearches.every(
-              ({ field, value }) => {
-                const fieldValue = getValueByPath(row, field)
-                  ?.toString()
-                  .toLowerCase();
-                return fieldValue?.includes(value.toLowerCase());
-              }
-            );
-            if (!specialSearchMatch) return false;
-            if (!textSearch) return true;
-            const modelName = row.model.name.toLowerCase();
-            const searchLower = textSearch.toLowerCase();
-            if (looksLikeRegex(textSearch)) {
-              try {
-                const regex = new RegExp(textSearch, "i");
-                return regex.test(modelName);
-              } catch (e) {
-                return modelName.includes(searchLower);
-              }
-            } else {
-              return modelName.includes(searchLower);
-            }
-          });
-        });
-      }
-    }
-    // Filter by booleans
-    if (selectedBooleanFilters.length > 0) {
-      filteredUnpinned = filteredUnpinned.filter((row) => {
-        return selectedBooleanFilters.every((filter) => {
-          const filterValue =
-            typeof filter === "object" ? filter.value : filter;
-          // Maintainer's Highlight keeps positive logic
-          if (filterValue === "is_highlighted_by_maintainer") {
-            return row.features[filterValue];
-          }
-          // For all other filters, invert the logic
-          if (filterValue === "is_not_available_on_hub") {
-            return row.features[filterValue];
-          }
-          return !row.features[filterValue];
-        });
-      });
-    }
-    // Create ordered array of pinned models respecting pinnedModels order
-    const orderedPinnedData = pinnedModels
-      .map((pinnedModelId) =>
-        pinnedData.find((item) => item.id === pinnedModelId)
-      )
-      .filter(Boolean);
-    // Combine all filtered data
-    const allFilteredData = [...filteredUnpinned, ...orderedPinnedData];
-    // Sort all data by average_score for dynamic_rank
-    const sortedByScore = [...allFilteredData].sort((a, b) => {
-      // Si les scores moyens sont différents, trier par score
-      if (a.model.average_score !== b.model.average_score) {
-        if (a.model.average_score === null && b.model.average_score === null)
-          return 0;
-        if (a.model.average_score === null) return 1;
-        if (b.model.average_score === null) return -1;
-        return b.model.average_score - a.model.average_score;
-      }
-      // Si les scores sont égaux, comparer le nom du modèle et la date de soumission
-      if (a.model.name === b.model.name) {
-        // Si même nom, trier par date de soumission (la plus récente d'abord)
-        const dateA = new Date(a.metadata?.submission_date || 0);
-        const dateB = new Date(b.metadata?.submission_date || 0);
-        return dateB - dateA;
-      }
-      // Si noms différents, trier par nom
-      return a.model.name.localeCompare(b.model.name);
-    });
-    // Create Map to store dynamic_ranks
-    const dynamicRankMap = new Map();
-    sortedByScore.forEach((item, index) => {
-      dynamicRankMap.set(item.id, index + 1);
-    });
-    // Add ranks to final data
-    const finalData = [...orderedPinnedData, ...filteredUnpinned].map(
-      (item) => {
-        return {
-          ...item,
-          dynamic_rank: dynamicRankMap.get(item.id),
-          rank: item.isPinned
-            ? pinnedModels.indexOf(item.id) + 1
-            : rankingMode === "static"
-            ? item.static_rank
-            : dynamicRankMap.get(item.id),
-          isPinned: pinnedModels.includes(item.id),
-        };
-      }
-    );
-    return finalData;
   }, [
     processedData,
-    selectedPrecisions,
-    selectedTypes,
-    paramsRange,
-    searchValue,
-    selectedBooleanFilters,
     rankingMode,
     pinnedModels,
-    isOfficialProviderActive,
   ]);
 };

 } from "../utils/searchUtils";
 import { ALLOWED_MODELS, isModelAllowed } from "../constants/allowedModels";
+// 硬编码数据集
+const HARDCODED_SCORES = {
+  vision: {
+    "GPT-4o": 55.54, "o3-Mini": 0.00, "Deepseek-V3": 0.00, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 16.27,
+    "meta-llama/Llama-3.1-70B-Instruct": 0.00, "google/gemma-3-4b-it": 14.97, "google/gemma-3-27b-it": 25.57,
+    "Qwen/Qwen2.5-32B-Instruct": 0.00, "Qwen/Qwen2.5-Omni-7B": 24.97, "TheFinAI/finma-7b-full": 0.00,
+    "Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 0.00, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 0.00,
+    "TheFinAI/FinMA-ES-Bilingual": 0.00, "TheFinAI/plutus-8B-instruct": 0.00, "Qwen-VL-MAX": 18.47,
+    "LLaVA-1.6 Vicuna-13B": 19.77, "Deepseek-VL-7B-Chat": 19.10, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
+    "Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
+  },
+  audio: {
+    "GPT-4o": 55.56, "o3-Mini": 0.00, "Deepseek-V3": 0.00, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 0.00,
+    "meta-llama/Llama-3.1-70B-Instruct": 0.00, "google/gemma-3-4b-it": 0.00, "google/gemma-3-27b-it": 0.00,
+    "Qwen/Qwen2.5-32B-Instruct": 0.00, "Qwen/Qwen2.5-Omni-7B": 48.22, "TheFinAI/finma-7b-full": 0.00,
+    "Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 0.00, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 0.00,
+    "TheFinAI/FinMA-ES-Bilingual": 0.00, "TheFinAI/plutus-8B-instruct": 0.00, "Qwen-VL-MAX": 0.00,
+    "LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 51.58, "Qwen2-Audio-7B": 48.02,
+    "Qwen2-Audio-7B-Instruct": 50.06, "SALMONN-7B": 24.24, "SALMONN-13B": 24.59
+  },
+  english: {
+    "GPT-4o": 42.18, "o3-Mini": 20.20, "Deepseek-V3": 18.04, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 24.16,
+    "meta-llama/Llama-3.1-70B-Instruct": 38.71, "google/gemma-3-4b-it": 16.13, "google/gemma-3-27b-it": 17.19,
+    "Qwen/Qwen2.5-32B-Instruct": 32.01, "Qwen/Qwen2.5-Omni-7B": 24.99, "TheFinAI/finma-7b-full": 28.89,
+    "Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 29.39, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 26.38,
+    "TheFinAI/FinMA-ES-Bilingual": 31.72, "TheFinAI/plutus-8B-instruct": 27.82, "Qwen-VL-MAX": 0.00,
+    "LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
+    "Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
+  },
+  chinese: {
+    "GPT-4o": 60.34, "o3-Mini": 0.00, "Deepseek-V3": 60.94, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 64.51,
+    "meta-llama/Llama-3.1-70B-Instruct": 56.74, "google/gemma-3-4b-it": 26.23, "google/gemma-3-27b-it": 26.24,
+    "Qwen/Qwen2.5-32B-Instruct": 56.62, "Qwen/Qwen2.5-Omni-7B": 53.09, "TheFinAI/finma-7b-full": 24.42,
+    "Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 23.04, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 13.18,
+    "TheFinAI/FinMA-ES-Bilingual": 21.50, "TheFinAI/plutus-8B-instruct": 31.04, "Qwen-VL-MAX": 0.00,
+    "LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
+    "Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
+  },
+  japanese: {
+    "GPT-4o": 0.00, "o3-Mini": 0.00, "Deepseek-V3": 0.00, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 48.43,
+    "meta-llama/Llama-3.1-70B-Instruct": 32.17, "google/gemma-3-4b-it": 8.98, "google/gemma-3-27b-it": 23.96,
+    "Qwen/Qwen2.5-32B-Instruct": 4.54, "Qwen/Qwen2.5-Omni-7B": 44.35, "TheFinAI/finma-7b-full": 46.94,
+    "Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 47.59, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 23.96,
+    "TheFinAI/FinMA-ES-Bilingual": 57.36, "TheFinAI/plutus-8B-instruct": 34.62, "Qwen-VL-MAX": 0.00,
+    "LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
+    "Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
+  },
+  spanish: {
+    "GPT-4o": 29.80, "o3-Mini": 4.53, "Deepseek-V3": 25.49, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 47.90,
+    "meta-llama/Llama-3.1-70B-Instruct": 37.84, "google/gemma-3-4b-it": 27.66, "google/gemma-3-27b-it": 27.77,
+    "Qwen/Qwen2.5-32B-Instruct": 37.47, "Qwen/Qwen2.5-Omni-7B": 39.16, "TheFinAI/finma-7b-full": 27.04,
+    "Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 42.86, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 28.01,
+    "TheFinAI/FinMA-ES-Bilingual": 38.69, "TheFinAI/plutus-8B-instruct": 40.16, "Qwen-VL-MAX": 0.00,
+    "LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
+    "Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
+  },
+  greek: {
+    "GPT-4o": 43.04, "o3-Mini": 9.48, "Deepseek-V3": 39.07, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 48.95,
+    "meta-llama/Llama-3.1-70B-Instruct": 43.60, "google/gemma-3-4b-it": 15.45, "google/gemma-3-27b-it": 15.44,
+    "Qwen/Qwen2.5-32B-Instruct": 44.32, "Qwen/Qwen2.5-Omni-7B": 23.45, "TheFinAI/finma-7b-full": 17.93,
+    "Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 29.49, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 20.91,
+    "TheFinAI/FinMA-ES-Bilingual": 15.47, "TheFinAI/plutus-8B-instruct": 60.19, "Qwen-VL-MAX": 0.00,
+    "LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
+    "Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
+  },
+  bilingual: {
+    "GPT-4o": 92.29, "o3-Mini": 90.13, "Deepseek-V3": 86.26, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 89.17,
+    "meta-llama/Llama-3.1-70B-Instruct": 92.13, "google/gemma-3-4b-it": 35.92, "google/gemma-3-27b-it": 35.92,
+    "Qwen/Qwen2.5-32B-Instruct": 92.29, "Qwen/Qwen2.5-Omni-7B": 91.80, "TheFinAI/finma-7b-full": 69.24,
+    "Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 91.60, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 71.81,
+    "TheFinAI/FinMA-ES-Bilingual": 66.57, "TheFinAI/plutus-8B-instruct": 91.59, "Qwen-VL-MAX": 0.00,
+    "LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
+    "Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
+  },
+  multilingual: {
+    "GPT-4o": 6.53, "o3-Mini": 7.80, "Deepseek-V3": 36.99, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 13.52,
+    "meta-llama/Llama-3.1-70B-Instruct": 21.97, "google/gemma-3-4b-it": 0.00, "google/gemma-3-27b-it": 0.00,
+    "Qwen/Qwen2.5-32B-Instruct": 18.48, "Qwen/Qwen2.5-Omni-7B": 16.29, "TheFinAI/finma-7b-full": 3.10,
+    "Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 1.76, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 10.25,
+    "TheFinAI/FinMA-ES-Bilingual": 0.35, "TheFinAI/plutus-8B-instruct": 7.24, "Qwen-VL-MAX": 0.00,
+    "LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
+    "Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
+  }
+};
 // Calculate min/max averages
 export const useAverageRange = (data) => {
   return useMemo(() => {
 // Process data with boolean standardization
 export const useProcessedData = (data, averageMode, visibleColumns) => {
   return useMemo(() => {
+    // 直接使用硬编码数据创建模型列表
+    const modelList = [];
+    // 从HARDCODED_SCORES中获取所有模型名称
+    const modelNames = new Set();
+    Object.values(HARDCODED_SCORES).forEach(categoryData => {
+      Object.entries(categoryData).forEach(([modelName, score]) => {
+        // 添加所有模型，不管分数是否为0
+        modelNames.add(modelName);
+      });
+    });
+    // 为每个模型创建条目
+    Array.from(modelNames).forEach((modelName, index) => {
+      // 创建硬编码评估数据
+      const hardcodedEvaluations = {
+        vision_average: getHardcodedScore(modelName, 'vision'),
+        audio_average: getHardcodedScore(modelName, 'audio'),
+        english_average: getHardcodedScore(modelName, 'english'),
+        chinese_average: getHardcodedScore(modelName, 'chinese'),
+        japanese_average: getHardcodedScore(modelName, 'japanese'),
+        spanish_average: getHardcodedScore(modelName, 'spanish'),
+        greek_average: getHardcodedScore(modelName, 'greek'),
+        bilingual_average: getHardcodedScore(modelName, 'bilingual'),
+        multilingual_average: getHardcodedScore(modelName, 'multilingual')
       };
+      // 计算总平均分（包含分数为0的类别）
+      const scores = Object.values(hardcodedEvaluations).filter(score => score !== null);
+      const averageScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : null;
+      // 创建模型数据
+      modelList.push({
+        id: `model-${index}`,
         model: {
+          name: modelName,
+          average_score: averageScore,
+          type: "chat", // 统一设为chat类型
         },
+        evaluations: hardcodedEvaluations,
+        features: {
+          is_moe: false,
+          is_flagged: false,
+          is_highlighted_by_maintainer: false,
+          is_merged: false,
+          is_not_available_on_hub: false,
+        },
+        metadata: {
+          submission_date: new Date().toISOString(),
+        },
+        isMissing: false,
+      });
     });
+    // 根据平均分排序
+    modelList.sort((a, b) => {
       if (a.model.average_score === null && b.model.average_score === null)
         return 0;
       if (a.model.average_score === null) return 1;
       if (b.model.average_score === null) return -1;
       return b.model.average_score - a.model.average_score;
     });
+    // 添加排名
+    return modelList.map((item, index) => ({
       ...item,
       static_rank: index + 1,
     }));
   }, [data, averageMode, visibleColumns]);
 };
+// 辅助函数：从硬编码数据中获取分数
+function getHardcodedScore(modelName, category) {
+  if (!HARDCODED_SCORES[category]) return null;
+  // 尝试精确匹配
+  if (HARDCODED_SCORES[category][modelName] !== undefined) {
+    return HARDCODED_SCORES[category][modelName];
+  }
+  // 尝试部分匹配
+  for (const key in HARDCODED_SCORES[category]) {
+    if (modelName.includes(key) || key.includes(modelName)) {
+      return HARDCODED_SCORES[category][key];
+    }
+  }
+  return null;
+}
 // Common filtering logic
 export const useFilteredData = (
   processedData,
   isOfficialProviderActive = false
 ) => {
   return useMemo(() => {
+    // 由于使用的是硬编码数据，这里直接返回所有数据而不进行过滤
+    return processedData.map((item, index) => ({
+      ...item,
+      dynamic_rank: index + 1,
+      rank: rankingMode === "static" ? item.static_rank : index + 1,
+      isPinned: pinnedModels.includes(item.id),
+    }));
   }, [
     processedData,
     rankingMode,
     pinnedModels,
   ]);
 };

frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useLeaderboardData.js CHANGED Viewed

@@ -8,60 +8,11 @@ const CACHE_KEY = "leaderboardData";
 const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
 export const useLeaderboardData = () => {
-  const queryClient = useQueryClient();
-  const [searchParams] = useSearchParams();
-  const isInitialLoadRef = useRef(true);
-  const { data, isLoading, error } = useQuery({
-    queryKey: ["leaderboard"],
-    queryFn: async () => {
-      try {
-        const cachedData = localStorage.getItem(CACHE_KEY);
-        if (cachedData) {
-          const { data: cached, timestamp } = JSON.parse(cachedData);
-          const age = Date.now() - timestamp;
-          if (age < CACHE_DURATION) {
-            return cached;
-          }
-        }
-        const response = await fetch("/api/leaderboard/formatted");
-        if (!response.ok) {
-          throw new Error(`HTTP error! status: ${response.status}`);
-        }
-        const newData = await response.json();
-        localStorage.setItem(
-          CACHE_KEY,
-          JSON.stringify({
-            data: newData,
-            timestamp: Date.now(),
-          })
-        );
-        return newData;
-      } catch (error) {
-        console.error("Detailed error:", error);
-        throw error;
-      }
-    },
-    staleTime: CACHE_DURATION,
-    cacheTime: CACHE_DURATION * 2,
-    refetchOnWindowFocus: false,
-    enabled: isInitialLoadRef.current || !!searchParams.toString(),
-  });
-  useMemo(() => {
-    if (data && isInitialLoadRef.current) {
-      isInitialLoadRef.current = false;
-    }
-  }, [data]);
   return {
-    data,
-    isLoading,
-    error,
-    refetch: () => queryClient.invalidateQueries(["leaderboard"]),
   };
 };

 const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
 export const useLeaderboardData = () => {
   return {
+    data: [], // 直接返回空数组，我们使用硬编码数据
+    isLoading: false,
+    error: null,
+    refetch: () => {}
   };
 };

frontend/src/pages/LeaderboardPage/components/Leaderboard/utils/columnUtils.js CHANGED Viewed

@@ -499,6 +499,67 @@ const createGreekLeaderboardHeader = (header) => (
   </Box>
 );
 export const createColumns = (
   getColorForValue,
   scoreDisplay = "normalized",
@@ -928,6 +989,142 @@ export const createColumns = (
         }),
       },
     },
   ];
   const optionalColumns = [

   </Box>
 );
+// 为各种类型的Leaderboard创建自定义标题组件
+const createLeaderboardHeader = (label, tooltip, linkUrl) => (header) => (
+  <Box
+    className="header-content"
+    sx={{
+      display: "flex",
+      alignItems: "center",
+      width: "100%",
+      position: "relative",
+    }}
+  >
+    <HeaderLabel
+      label={`${label} Leaderboard`}
+      tooltip={tooltip}
+      className="header-label"
+      isSorted={header?.column?.getIsSorted()}
+    />
+    <Box
+      sx={{
+        display: "flex",
+        alignItems: "center",
+        gap: 0.5,
+        ml: "auto",
+        flexShrink: 0,
+      }}
+    >
+      <InfoIcon tooltip={tooltip} />
+      {linkUrl && (
+        <Link
+          href={linkUrl}
+          target="_blank"
+          rel="noopener noreferrer"
+          aria-label={`View ${label} Leaderboard`}
+          sx={{
+            color: "info.main",
+            display: "flex",
+            alignItems: "center",
+            ml: 0.5,
+            textDecoration: "none",
+            "&:hover": {
+              textDecoration: "underline",
+              "& svg": {
+                opacity: 0.8,
+              },
+            },
+          }}
+        >
+          <OpenInNewIcon
+            sx={{
+              fontSize: "1rem",
+              opacity: 0.6,
+              transition: "opacity 0.2s ease-in-out",
+            }}
+          />
+        </Link>
+      )}
+    </Box>
+  </Box>
+);
 export const createColumns = (
   getColorForValue,
   scoreDisplay = "normalized",
         }),
       },
     },
+    {
+      accessorKey: "evaluations.vision_average",
+      header: createLeaderboardHeader("Vision", "Average performance on vision tasks", null),
+      cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.vision_average"),
+      size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
+      meta: {
+        headerStyle: {
+          backgroundColor: (theme) => alpha(theme.palette.primary.light, 0.05),
+        },
+        cellStyle: (value) => ({
+          position: "relative",
+          overflow: "hidden",
+          padding: "8px 16px",
+          backgroundColor: (theme) => alpha(theme.palette.primary.light, 0.05),
+        }),
+      },
+    },
+    {
+      accessorKey: "evaluations.audio_average",
+      header: createLeaderboardHeader("Audio", "Average performance on audio tasks", null),
+      cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.audio_average"),
+      size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
+      meta: {
+        headerStyle: {
+          backgroundColor: (theme) => alpha(theme.palette.secondary.light, 0.05),
+        },
+        cellStyle: (value) => ({
+          position: "relative",
+          overflow: "hidden",
+          padding: "8px 16px",
+          backgroundColor: (theme) => alpha(theme.palette.secondary.light, 0.05),
+        }),
+      },
+    },
+    {
+      accessorKey: "evaluations.english_average",
+      header: createLeaderboardHeader("English", "Average performance on English language tasks", null),
+      cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.english_average"),
+      size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
+      meta: {
+        headerStyle: {
+          backgroundColor: (theme) => alpha(theme.palette.success.light, 0.05),
+        },
+        cellStyle: (value) => ({
+          position: "relative",
+          overflow: "hidden",
+          padding: "8px 16px",
+          backgroundColor: (theme) => alpha(theme.palette.success.light, 0.05),
+        }),
+      },
+    },
+    {
+      accessorKey: "evaluations.chinese_average",
+      header: createLeaderboardHeader("Chinese", "Average performance on Chinese language tasks", null),
+      cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.chinese_average"),
+      size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
+      meta: {
+        headerStyle: {
+          backgroundColor: (theme) => alpha(theme.palette.warning.light, 0.05),
+        },
+        cellStyle: (value) => ({
+          position: "relative",
+          overflow: "hidden",
+          padding: "8px 16px",
+          backgroundColor: (theme) => alpha(theme.palette.warning.light, 0.05),
+        }),
+      },
+    },
+    {
+      accessorKey: "evaluations.japanese_average",
+      header: createLeaderboardHeader("Japanese", "Average performance on Japanese language tasks", null),
+      cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.japanese_average"),
+      size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
+      meta: {
+        headerStyle: {
+          backgroundColor: (theme) => alpha(theme.palette.error.light, 0.05),
+        },
+        cellStyle: (value) => ({
+          position: "relative",
+          overflow: "hidden",
+          padding: "8px 16px",
+          backgroundColor: (theme) => alpha(theme.palette.error.light, 0.05),
+        }),
+      },
+    },
+    {
+      accessorKey: "evaluations.spanish_average",
+      header: createLeaderboardHeader("Spanish", "Average performance on Spanish language tasks", null),
+      cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.spanish_average"),
+      size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
+      meta: {
+        headerStyle: {
+          backgroundColor: (theme) => alpha(theme.palette.info.main, 0.05),
+        },
+        cellStyle: (value) => ({
+          position: "relative",
+          overflow: "hidden",
+          padding: "8px 16px",
+          backgroundColor: (theme) => alpha(theme.palette.info.main, 0.05),
+        }),
+      },
+    },
+    {
+      accessorKey: "evaluations.bilingual_average",
+      header: createLeaderboardHeader("Bilingual", "Average performance on bilingual tasks", null),
+      cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.bilingual_average"),
+      size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
+      meta: {
+        headerStyle: {
+          backgroundColor: (theme) => alpha(theme.palette.primary.main, 0.05),
+        },
+        cellStyle: (value) => ({
+          position: "relative",
+          overflow: "hidden",
+          padding: "8px 16px",
+          backgroundColor: (theme) => alpha(theme.palette.primary.main, 0.05),
+        }),
+      },
+    },
+    {
+      accessorKey: "evaluations.multilingual_average",
+      header: createLeaderboardHeader("Multilingual", "Average performance on multilingual tasks", null),
+      cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.multilingual_average"),
+      size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
+      meta: {
+        headerStyle: {
+          backgroundColor: (theme) => alpha(theme.palette.secondary.main, 0.05),
+        },
+        cellStyle: (value) => ({
+          position: "relative",
+          overflow: "hidden",
+          padding: "8px 16px",
+          backgroundColor: (theme) => alpha(theme.palette.secondary.main, 0.05),
+        }),
+      },
+    }
   ];
   const optionalColumns = [