import { Benchmark } from "./types"; export const qwenBenchmarks: Benchmark[] = [ { model: "Qwen3-235B-A22B (Base Model)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { mmlu: 87.81, //mmluredux: 87.40, ////"mmlu-pro": 68.18, ////supergpqa: 44.06, ////bbh: 88.87, gpqa: 47.47, ////gsm8k: 94.39, ////math: 71.84, ////evalplus: 77.60, //multiple: 65.94, mbpp: 81.40, //cruxo: 79.00, ////mgsm: 83.53, mmmlu: 86.70, ////include: 73.46, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-32B (Base Model)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { mmlu: 83.61, //"mmlu-redux": 83.41, //"mmlu-pro": 65.54, //supergpqa: 39.78, //bbh: 87.38, gpqa: 49.49, //gsm8k: 93.40, //math: 61.62, //evalplus: 72.05, //"multipl-e":: 67.06, mbpp: 78.20, // "crux-o":: 72.50, //mgsm: 83.06, mmmlu: 83.83, //include: 67.87, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-14B (Base Model)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { mmlu: 81.05, //"mmlu-redux": 79.88, //"mmlu-pro": 61.03, //supergpqa: 34.27, //bbh: 81.07, gpqa: 39.90, //gsm8k: 92.49, //math: 62.02, //evalplus: 72.23, //"multipl-e":: 61.69, mbpp: 73.40, // "crux-o":: 68.60, //mgsm: 79.20, mmmlu: 81.46, //include: 64.55, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-30B-A3B (Base Model)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { mmlu: 81.38, //"mmlu-redux": 81.17, //"mmlu-pro": 61.49, //supergpqa: 35.72, //bbh: 81.54, gpqa: 43.94, //gsm8k: 91.81, //math: 59.04, //evalplus: 71.45, //"multipl-e":: 66.53, mbpp: 74.40, // "crux-o":: 67.20, //mgsm: 79.11, mmmlu: 81.46, //include: 67.00, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-8B (Base Model)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { mmlu: 76.89, //"mmlu-redux": 76.17, //"mmlu-pro": 56.73, //supergpqa: 31.64, //bbh: 78.40, gpqa: 44.44, //gsm8k: 89.84, //math: 60.80, //evalplus: 67.65, //"multipl-e":: 58.75, mbpp: 69.80, // "crux-o":: 62.00, //mgsm: 76.02, mmmlu: 75.72, //include: 59.40, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-4B (Base Model)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { mmlu: 72.99, //"mmlu-redux": 72.79, //"mmlu-pro": 50.58, //supergpqa: 28.43, //bbh: 72.59, gpqa: 36.87, //gsm8k: 87.79, //math: 54.10, //evalplus: 63.53, //"multipl-e":: 53.13, mbpp: 67.00, // "crux-o":: 55.00, //mgsm: 67.74, mmmlu: 71.42, //include: 56.29, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-1.7B (Base Model)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { mmlu: 62.63, //"mmlu-redux": 61.66, //"mmlu-pro": 36.76, //supergpqa: 20.92, //bbh: 54.47, gpqa: 28.28, //gsm8k: 75.44, //math: 43.50, //evalplus: 52.70, //"multipl-e":: 42.71, mbpp: 55.40, // "crux-o":: 36.40, //mgsm: 50.71, mmmlu: 63.27, //include: 45.57, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-0.6B (Base Model)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { mmlu: 52.81, //"mmlu-redux": 51.26, //"mmlu-pro": 24.74, //supergpqa: 15.03, //bbh: 41.47, gpqa: 26.77, //gsm8k: 59.59, //math: 32.44, //evalplus: 36.23, //"multipl-e":: 24.58, mbpp: 36.60, // "crux-o":: 27.00, //mgsm: 30.99, mmmlu: 50.16, //include: 34.26, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-235B-A22B (Thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 92.7, gpqa_diamond: 71.1, //"c-eval": 89.6, //"livebench-2024-11-25": 77.1, //"ifeval-strict-prompt": 83.4, //"arena-hard": 95.6, //"alignbench-v1.1": 8.94, //"creative-writing-v3": 84.6, //writingbench: 8.03, //"math-500": 98.0, aime_24: 85.7, aime_2025: 81.5, //zebralogic: 80.3, //autologi: 89.0, //"bfcl-v3": 70.8, //"livecodebench-v5": 70.7, //"codeforces-rating": 2056, //"codeforces-percentile": 98.2, //"multi-if": 71.9, //include: 78.7, // "mmmlu-14-languages": 84.3, //"mt-aime2024": 80.8, //poly//math: 54.7, //mlogiqa: 77.1, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-235B-A22B (Non-thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 89.2, gpqa_diamond: 62.9, //"c-eval": 86.1, //"livebench-2024-11-25": 62.5, //"ifeval-strict-prompt": 83.2, //"arena-hard": 96.1, //"alignbench-v1.1": 8.91, //"creative-writing-v3": 80.4, //writingbench: 7.70, //"math-500": 91.2, aime_24: 40.1, aime_2025: 24.7, //zebralogic: 37.7, //autologi: 83.3, //"bfcl-v3": 68.0, //"livecodebench-v5": 35.3, //"codeforces-rating": 1387, //"codeforces-percentile": 75.7, //"multi-if": 70.2, //include: 75.6, // "mmmlu-14-languages": 79.8, //"mt-aime2024": 32.4, //poly//math: 27.0, //mlogiqa: 67.6, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-32B (Thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 90.9, gpqa_diamond: 68.4, //"c-eval": 87.3, //"livebench-2024-11-25": 74.9, //"ifeval-strict-prompt": 85.0, //"arena-hard": 93.8, //"alignbench-v1.1": 8.72, //"creative-writing-v3": 81.0, //writingbench: 7.90, //"math-500": 97.2, aime_24: 81.4, aime_2025: 72.9, //zebralogic: 88.8, //autologi: 87.3, //"bfcl-v3": 70.3, //"livecodebench-v5": 65.7, //"codeforces-rating": 1977, //"codeforces-percentile": 97.7, //"multi-if": 73.0, //include: 73.7, // "mmmlu-14-languages": 80.6, //"mt-aime2024": 75.0, //poly//math: 47.4, //mlogiqa: 76.3, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-32B (Non-thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 85.7, gpqa_diamond: 54.6, //"c-eval": 83.3, //"livebench-2024-11-25": 59.8, //"ifeval-strict-prompt": 83.2, //"arena-hard": 92.8, //"alignbench-v1.1": 8.58, //"creative-writing-v3": 78.3, //writingbench: 7.54, //"math-500": 88.6, aime_24: 31.0, aime_2025: 20.2, //zebralogic: 29.2, //autologi: 78.5, //"bfcl-v3": 63.0, //"livecodebench-v5": 31.3, //"codeforces-rating": 1353, //"codeforces-percentile": 71.0, //"multi-if": 70.7, //include: 70.9, // "mmmlu-14-languages": 76.5, //"mt-aime2024": 24.1, //poly//math: 22.5, //mlogiqa: 62.9, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-14B (Thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 88.6, gpqa_diamond: 64.0, //"c-eval": 86.2, //"livebench-2024-11-25": 71.3, //"ifeval-strict-prompt": 85.4, //"arena-hard": 91.7, //"alignbench-v1.1": 8.56, //"creative-writing-v3": 80.3, //writingbench: 7.80, //"math-500": 96.8, aime_24: 79.3, aime_2025: 70.4, //zebralogic: 88.5, //autologi: 89.2, //"bfcl-v3": 70.4, //"livecodebench-v5": 63.5, //"codeforces-rating": 1766, //"codeforces-percentile": 95.3, //"multi-if": 74.8, //include: 71.7, // "mmmlu-14-languages": 77.9, //"mt-aime2024": 73.3, //poly//math: 45.8, //mlogiqa: 71.1, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-30B-A3B (Thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 89.5, gpqa_diamond: 65.8, //"c-eval": 86.6, //"livebench-2024-11-25": 74.3, //"ifeval-strict-prompt": 86.5, //"arena-hard": 91.0, //"alignbench-v1.1": 8.70, //"creative-writing-v3": 79.1, //writingbench: 7.70, //"math-500": 98.0, aime_24: 80.4, aime_2025: 70.9, //zebralogic: 89.5, //autologi: 88.7, //"bfcl-v3": 69.1, //"livecodebench-v5": 62.6, //"codeforces-rating": 1974, //"codeforces-percentile": 97.7, //"multi-if": 72.2, //include: 71.9, // "mmmlu-14-languages": 78.4, //"mt-aime2024": 73.9, //poly//math: 46.1, //mlogiqa: 70.1, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-14B (Non-thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 82.0, gpqa_diamond: 54.8, //"c-eval": 81.0, //"livebench-2024-11-25": 59.6, //"ifeval-strict-prompt": 84.8, //"arena-hard": 86.3, //"alignbench-v1.1": 8.52, //"creative-writing-v3": 73.1, //writingbench: 7.24, //"math-500": 90.0, aime_24: 31.7, aime_2025: 23.3, //zebralogic: 33.0, //autologi: 82.0, //"bfcl-v3": 61.5, //"livecodebench-v5": 29.0, //"codeforces-rating": 1200, //"codeforces-percentile": 58.6, //"multi-if": 72.9, //include: 67.8, // "mmmlu-14-languages": 72.6, //"mt-aime2024": 23.2, //poly//math: 22.0, //mlogiqa: 58.9, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-30B-A3B (Non-thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 84.1, gpqa_diamond: 54.8, //"c-eval": 82.9, //"livebench-2024-11-25": 59.4, //"ifeval-strict-prompt": 83.7, //"arena-hard": 88.0, //"alignbench-v1.1": 8.55, //"creative-writing-v3": 68.1, //writingbench: 7.22, //"math-500": 89.8, aime_24: 32.8, aime_2025: 21.6, //zebralogic: 33.2, //autologi: 81.5, //"bfcl-v3": 58.6, //"livecodebench-v5": 29.8, //"codeforces-rating": 1267, //"codeforces-percentile": 64.1, //"multi-if": 70.8, //include: 67.8, // "mmmlu-14-languages": 73.8, //"mt-aime2024": 24.6, //poly//math: 23.3, //mlogiqa: 53.3, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-4B (Thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 83.7, gpqa_diamond: 55.9, //"c-eval": 77.5, //"livebench-2024-11-25": 63.6, //"ifeval-strict-prompt": 81.9, //"arena-hard": 76.6, //"alignbench-v1.1": 8.30, //"creative-writing-v3": 61.1, //writingbench: 7.35, //"math-500": 97.0, aime_24: 73.8, aime_2025: 65.6, //zebralogic: 81.0, //autologi: 87.9, //"bfcl-v3": 65.9, //"livecodebench-v5": 54.2, //"codeforces-rating": 1671, //"codeforces-percentile": 92.8, //"multi-if": 66.3, //include: 61.8, // "mmmlu-14-languages": 69.8, //"mt-aime2024": 60.7, //poly//math: 40.0, //mlogiqa: 65.9, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-8B (Thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 87.5, gpqa_diamond: 62.0, //"c-eval": 83.4, //"livebench-2024-11-25": 67.1, //"ifeval-strict-prompt": 85.0, //"arena-hard": 85.8, //"alignbench-v1.1": 8.46, //"creative-writing-v3": 75.0, //writingbench: 7.59, //"math-500": 97.4, aime_24: 76.0, aime_2025: 67.3, //zebralogic: 84.8, //autologi: 89.1, //"bfcl-v3": 68.1, //"livecodebench-v5": 57.5, //"codeforces-rating": 1785, //"codeforces-percentile": 95.6, //"multi-if": 71.2, //include: 67.8, // "mmmlu-14-languages": 74.4, //"mt-aime2024": 65.4, //poly//math: 42.7, //mlogiqa: 69.0, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-4B (Non-thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 77.3, gpqa_diamond: 41.7, //"c-eval": 72.2, //"livebench-2024-11-25": 48.4, //"ifeval-strict-prompt": 81.2, //"arena-hard": 66.2, //"alignbench-v1.1": 8.10, //"creative-writing-v3": 53.6, //writingbench: 6.85, //"math-500": 84.8, aime_24: 25.0, aime_2025: 19.1, //zebralogic: 35.2, //autologi: 76.3, //"bfcl-v3": 57.6, //"livecodebench-v5": 21.3, //"codeforces-rating": 842, //"codeforces-percentile": 33.7, //"multi-if": 61.3, //include: 53.8, // "mmmlu-14-languages": 61.7, //"mt-aime2024": 13.9, //poly//math: 16.6, //mlogiqa: 49.9, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-8B (Non-thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 79.5, gpqa_diamond: 39.3, //"c-eval": 77.9, //"livebench-2024-11-25": 53.5, //"ifeval-strict-prompt": 83.0, //"arena-hard": 79.6, //"alignbench-v1.1": 8.38, //"creative-writing-v3": 64.5, //writingbench: 7.15, //"math-500": 87.4, aime_24: 29.1, aime_2025: 20.9, //zebralogic: 26.7, //autologi: 76.5, //"bfcl-v3": 60.2, //"livecodebench-v5": 22.8, //"codeforces-rating": 1110, //"codeforces-percentile": 52.4, //"multi-if": 69.2, //include: 62.5, // "mmmlu-14-languages": 66.9, //"mt-aime2024": 16.6, //poly//math: 18.8, //mlogiqa: 51.4, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-0.6B (Thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 55.6, gpqa_diamond: 27.9, //"c-eval": 50.4, //"livebench-2024-11-25": 30.3, //"ifeval-strict-prompt": 59.2, //"arena-hard": 8.5, //"alignbench-v1.1": 6.10, //"creative-writing-v3": 30.6, //writingbench: 5.61, //"math-500": 77.6, aime_24: 10.7, aime_2025: 15.1, //zebralogic: 30.3, //autologi: 61.6, //"bfcl-v3": 46.4, //"livecodebench-v5": 12.3, //"multi-if": 36.1, //include: 35.9, // "mmmlu-14-languages": 43.1, //"mt-aime2024": 7.8, //poly//math: 11.4, //mlogiqa: 40.9, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-1.7B (Thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 73.9, gpqa_diamond: 40.1, //"c-eval": 68.1, //"livebench-2024-11-25": 51.1, //"ifeval-strict-prompt": 72.5, //"arena-hard": 43.1, //"alignbench-v1.1": 7.60, //"creative-writing-v3": 48.0, //writingbench: 7.02, //"math-500": 93.4, aime_24: 48.3, aime_2025: 36.8, //zebralogic: 63.2, //autologi: 83.2, //"bfcl-v3": 56.6, //"livecodebench-v5": 33.2, //"multi-if": 51.2, //include: 51.8, // "mmmlu-14-languages": 59.1, //"mt-aime2024": 36.1, //poly//math: 25.2, //mlogiqa: 56.0, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-0.6B (Non-thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 44.6, gpqa_diamond: 22.9, //"c-eval": 42.6, //"livebench-2024-11-25": 21.8, //"ifeval-strict-prompt": 54.5, //"arena-hard": 6.5, //"alignbench-v1.1": 5.60, //"creative-writing-v3": 28.4, //writingbench: 5.13, //"math-500": 55.2, aime_24: 3.4, aime_2025: 2.6, //zebralogic: 4.2, //autologi: 37.4, //"bfcl-v3": 44.1, //"livecodebench-v5": 3.6, //"multi-if": 33.3, //include: 34.4, // "mmmlu-14-languages": 37.1, //"mt-aime2024": 1.5, //poly//math: 4.6, //mlogiqa: 37.3, }, source: "https://arxiv.org/pdf/2505.09388", }, { model: "Qwen3-1.7B (Non-thinking Mode)", provider: "Qwen", inputPrice: 0, outputPrice: 0, benchmark: { //"mmlu-redux": 64.4, gpqa_diamond: 28.6, //"c-eval": 61.0, //"livebench-2024-11-25": 35.6, //"ifeval-strict-prompt": 68.2, //"arena-hard": 36.9, //"alignbench-v1.1": 7.20, //"creative-writing-v3": 43.6, //writingbench: 6.54, //"math-500": 73.0, aime_24: 13.4, aime_2025: 9.8, //zebralogic: 12.8, //autologi: 59.8, //"bfcl-v3": 52.2, //"livecodebench-v5": 11.6, //"multi-if": 44.7, //include: 42.6, // "mmmlu-14-languages": 48.3, //"mt-aime2024": 4.9, //poly//math: 10.3, //mlogiqa: 41.1, }, source: "https://arxiv.org/pdf/2505.09388", },];