zhiminy commited on
Commit
0caf7c8
·
1 Parent(s): 61cd7b8

add new models

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. context_window.json +8 -8
app.py CHANGED
@@ -519,7 +519,7 @@ with gr.Blocks() as app:
519
  # ⚔️ Software Engineering (SE) Arena: Explore and Test the Best SE Chatbots with Long-Context Interactions
520
 
521
  ## 📜How It Works
522
- - **Blind Comparison**: Submit a SE-related query to two anonymous chatbots randomly selected from up to {len(available_models)} top models, including OpenAI, Gemini, Claude, Deepseek, Llama, Mistral, and others.
523
  - **Interactive Voting**: Engage in multi-turn dialogues with both chatbots and compare their responses. You can continue the conversation until you confidently choose the better model.
524
  - **Fair Play Rules**: Votes are counted only if chatbot identities remain anonymous. Revealing a chatbot's identity disqualifies the session.
525
 
 
519
  # ⚔️ Software Engineering (SE) Arena: Explore and Test the Best SE Chatbots with Long-Context Interactions
520
 
521
  ## 📜How It Works
522
+ - **Blind Comparison**: Submit a SE-related query to two anonymous chatbots randomly selected from up to {len(available_models)} top models, including OpenAI-o3, Gemini-2.0, Claude-3.5, Deepseek-r1, Llama-3.3, Qwen-2.5, and others.
523
  - **Interactive Voting**: Engage in multi-turn dialogues with both chatbots and compare their responses. You can continue the conversation until you confidently choose the better model.
524
  - **Fair Play Rules**: Votes are counted only if chatbot identities remain anonymous. Revealing a chatbot's identity disqualifies the session.
525
 
context_window.json CHANGED
@@ -1,17 +1,18 @@
1
  {
2
  "gpt-3.5-turbo": 16000,
3
- "gpt-3.5-turbo-16k": 16000,
4
- "gpt-4-32k": 32000,
5
  "gpt-4-turbo": 128000,
6
  "gpt-4o": 128000,
7
  "gpt-4o-mini": 128000,
8
- "claude-3-5-sonnet-latest" : 200000,
 
 
9
  "deepseek-chat": 64000,
10
  "deepseek-r1": 64000,
11
- "gemini-1.5-flash-latest": 1048576,
12
- "gemini-1.5-pro-latest": 2097152,
13
- "Hunyuan-A52B-Instruct": 128000,
14
- "llama-3-70b": 128000,
 
15
  "llama-3.1-405b": 128000,
16
  "llama-3.1-70b": 128000,
17
  "llama-3.3-70b": 128000,
@@ -23,6 +24,5 @@
23
  "Qwen2.5-72B-Instruct": 131072,
24
  "Qwen2.5-72B-Instruct-128k": 131072,
25
  "Qwen2.5-Coder-32B-Instruct": 131072,
26
- "QwQ-32B-Preview": 32768,
27
  "yi-large": 32000
28
  }
 
1
  {
2
  "gpt-3.5-turbo": 16000,
 
 
3
  "gpt-4-turbo": 128000,
4
  "gpt-4o": 128000,
5
  "gpt-4o-mini": 128000,
6
+ "claude-3-5-haiku-20241022" : 200000,
7
+ "claude-3-5-sonnet-20241022" : 200000,
8
+ "claude-3-opus-20240229" : 200000,
9
  "deepseek-chat": 64000,
10
  "deepseek-r1": 64000,
11
+ "gemini-1.5-flash": 1048576,
12
+ "gemini-1.5-pro": 2097152,
13
+ "gemini-2.0-flash-lite-preview": 1048576,
14
+ "gemini-2.0-pro-exp": 2097152,
15
+ "llama-3.1-8b": 128000,
16
  "llama-3.1-405b": 128000,
17
  "llama-3.1-70b": 128000,
18
  "llama-3.3-70b": 128000,
 
24
  "Qwen2.5-72B-Instruct": 131072,
25
  "Qwen2.5-72B-Instruct-128k": 131072,
26
  "Qwen2.5-Coder-32B-Instruct": 131072,
 
27
  "yi-large": 32000
28
  }