ping98k commited on
Commit
e4a181a
·
1 Parent(s): 9036d14

update to run on huggingface spaces

Browse files
Files changed (2) hide show
  1. main.py +12 -12
  2. tournament_utils.py +4 -4
main.py CHANGED
@@ -1,5 +1,5 @@
1
  from dotenv import load_dotenv
2
- load_dotenv("./local.env",override=True)
3
  import os, json, re, ast, gradio as gr
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
  from tqdm import tqdm
@@ -40,9 +40,9 @@ PAIRWISE_FILTER_DEFAULT = os.getenv("ENABLE_PAIRWISE_FILTER", "true").lower() ==
40
  GENERATE_MODEL_DEFAULT = os.getenv("GENERATE_MODEL", "gpt-4o-mini")
41
  SCORE_MODEL_DEFAULT = os.getenv("SCORE_MODEL", "gpt-4o-mini")
42
  PAIRWISE_MODEL_DEFAULT = os.getenv("PAIRWISE_MODEL", "gpt-4o-mini")
43
- GENERATE_TEMPERATURE_DEFAULT = float(os.getenv("GENERATE_TEMPERATURE", "0.9"))
44
- SCORE_TEMPERATURE_DEFAULT = float(os.getenv("SCORE_TEMPERATURE", "0.6"))
45
- PAIRWISE_TEMPERATURE_DEFAULT = float(os.getenv("PAIRWISE_TEMPERATURE", "0.6"))
46
  SCORE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_SCORE", "true").lower() == "true"
47
  PAIRWISE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_PAIRWISE", "true").lower() == "true"
48
  GENERATE_THINKING_DEFAULT = os.getenv("ENABLE_GENERATE_THINKING", "false").lower() == "true"
@@ -107,9 +107,9 @@ def run_tournament(
107
  if pairwise_temperature is None:
108
  pairwise_temperature = PAIRWISE_TEMPERATURE_DEFAULT
109
  if not api_base:
110
- api_base = API_BASE_DEFAULT
111
  if not api_token:
112
- api_token = API_TOKEN_DEFAULT
113
  if not generate_model:
114
  generate_model = GENERATE_MODEL_DEFAULT
115
  if not score_model:
@@ -322,7 +322,7 @@ def run_tournament(
322
  demo = gr.Interface(
323
  fn=run_tournament,
324
  inputs=[
325
- gr.Textbox(value=API_BASE_DEFAULT, label="API Base Path"),
326
  gr.Textbox(value="", label="API Token", type="password"),
327
  gr.Textbox(value=GENERATE_MODEL_DEFAULT, label="Generation Model"),
328
  gr.Textbox(value=SCORE_MODEL_DEFAULT, label="Score Model"),
@@ -340,11 +340,11 @@ demo = gr.Interface(
340
  gr.Checkbox(value=PAIRWISE_FILTER_DEFAULT, label="Enable Pairwise Filter"),
341
  gr.Checkbox(value=SCORE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Score Model"),
342
  gr.Checkbox(value=PAIRWISE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Pairwise Model"),
343
- gr.Checkbox(value=GENERATE_THINKING_DEFAULT, label="Enable Thinking (Generate)"),
344
- gr.Checkbox(value=SCORE_THINKING_DEFAULT, label="Enable Thinking (Score)"),
345
- gr.Checkbox(value=PAIRWISE_THINKING_DEFAULT, label="Enable Thinking (Pairwise)"),
346
- gr.Checkbox(value=False, label="Enable Explain (Score)"),
347
- gr.Checkbox(value=False, label="Enable Explain (Pairwise)"),
348
  ],
349
  outputs=[
350
  gr.Textbox(lines=10, label="Process"),
 
1
  from dotenv import load_dotenv
2
+ # load_dotenv("./local.env",override=True)
3
  import os, json, re, ast, gradio as gr
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
  from tqdm import tqdm
 
40
  GENERATE_MODEL_DEFAULT = os.getenv("GENERATE_MODEL", "gpt-4o-mini")
41
  SCORE_MODEL_DEFAULT = os.getenv("SCORE_MODEL", "gpt-4o-mini")
42
  PAIRWISE_MODEL_DEFAULT = os.getenv("PAIRWISE_MODEL", "gpt-4o-mini")
43
+ GENERATE_TEMPERATURE_DEFAULT = float(os.getenv("GENERATE_TEMPERATURE", "1.2"))
44
+ SCORE_TEMPERATURE_DEFAULT = float(os.getenv("SCORE_TEMPERATURE", "0.1"))
45
+ PAIRWISE_TEMPERATURE_DEFAULT = float(os.getenv("PAIRWISE_TEMPERATURE", "0.1"))
46
  SCORE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_SCORE", "true").lower() == "true"
47
  PAIRWISE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_PAIRWISE", "true").lower() == "true"
48
  GENERATE_THINKING_DEFAULT = os.getenv("ENABLE_GENERATE_THINKING", "false").lower() == "true"
 
107
  if pairwise_temperature is None:
108
  pairwise_temperature = PAIRWISE_TEMPERATURE_DEFAULT
109
  if not api_base:
110
+ api_base = ""
111
  if not api_token:
112
+ api_token = ""
113
  if not generate_model:
114
  generate_model = GENERATE_MODEL_DEFAULT
115
  if not score_model:
 
322
  demo = gr.Interface(
323
  fn=run_tournament,
324
  inputs=[
325
+ gr.Textbox(value=API_BASE_DEFAULT, label="API Base Path", info="Clone repos and run locally To change the API base path"),
326
  gr.Textbox(value="", label="API Token", type="password"),
327
  gr.Textbox(value=GENERATE_MODEL_DEFAULT, label="Generation Model"),
328
  gr.Textbox(value=SCORE_MODEL_DEFAULT, label="Score Model"),
 
340
  gr.Checkbox(value=PAIRWISE_FILTER_DEFAULT, label="Enable Pairwise Filter"),
341
  gr.Checkbox(value=SCORE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Score Model"),
342
  gr.Checkbox(value=PAIRWISE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Pairwise Model"),
343
+ gr.Checkbox(value=GENERATE_THINKING_DEFAULT, label="Enable Thinking (Generate)", info="Enable Qwen3 think mode"),
344
+ gr.Checkbox(value=SCORE_THINKING_DEFAULT, label="Enable Thinking (Score)" , info="Enable Qwen3 think mode"),
345
+ gr.Checkbox(value=PAIRWISE_THINKING_DEFAULT, label="Enable Thinking (Pairwise)" , info="Enable Qwen3 think mode"),
346
+ gr.Checkbox(value=True, label="Enable Explain (Score)", info="Prompt LLM to think step by step"),
347
+ gr.Checkbox(value=True, label="Enable Explain (Pairwise)", info="Prompt LLM to think step by step"),
348
  ],
349
  outputs=[
350
  gr.Textbox(lines=10, label="Process"),
tournament_utils.py CHANGED
@@ -35,7 +35,7 @@ def generate_players(
35
  """
36
  messages = [{"role": "user", "content": instruction}]
37
  kwargs = _completion_kwargs(api_base, api_key, temperature)
38
- kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
39
  response = completion(
40
  model=model,
41
  messages=messages,
@@ -64,7 +64,7 @@ def prompt_score(
64
  return_usage: bool = False,
65
  ) -> str | tuple[str, object]:
66
  """Return a plaintext score evaluation for `player`."""
67
- example_scores = ", ".join(["1-10"] * len(criteria_list)) or "1-10"
68
  prompt = f"""Evaluate the output below on the following criteria:
69
  {criteria_block}
70
 
@@ -84,7 +84,7 @@ def prompt_score(
84
 
85
  prompt += f"\n\nOutput:\n{player}"
86
  kwargs = _completion_kwargs(api_base, api_key, temperature)
87
- kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
88
  response = completion(
89
  model=model,
90
  messages=[{"role": "system", "content": prompt}],
@@ -135,7 +135,7 @@ def prompt_pairwise(
135
  prompt += f"\n\nInstruction:\n{instruction}"
136
  prompt += f"\n\nPlayers:\n<A>{a}</A>\n<B>{b}</B>"
137
  kwargs = _completion_kwargs(api_base, api_key, temperature)
138
- kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
139
  response = completion(
140
  model=model,
141
  messages=[{"role": "system", "content": prompt}],
 
35
  """
36
  messages = [{"role": "user", "content": instruction}]
37
  kwargs = _completion_kwargs(api_base, api_key, temperature)
38
+ # kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
39
  response = completion(
40
  model=model,
41
  messages=messages,
 
64
  return_usage: bool = False,
65
  ) -> str | tuple[str, object]:
66
  """Return a plaintext score evaluation for `player`."""
67
+ example_scores = ", ".join(["5"] * len(criteria_list)) or "5"
68
  prompt = f"""Evaluate the output below on the following criteria:
69
  {criteria_block}
70
 
 
84
 
85
  prompt += f"\n\nOutput:\n{player}"
86
  kwargs = _completion_kwargs(api_base, api_key, temperature)
87
+ # kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
88
  response = completion(
89
  model=model,
90
  messages=[{"role": "system", "content": prompt}],
 
135
  prompt += f"\n\nInstruction:\n{instruction}"
136
  prompt += f"\n\nPlayers:\n<A>{a}</A>\n<B>{b}</B>"
137
  kwargs = _completion_kwargs(api_base, api_key, temperature)
138
+ # kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
139
  response = completion(
140
  model=model,
141
  messages=[{"role": "system", "content": prompt}],