Spaces:
Sleeping
Sleeping
ping98k
commited on
Commit
·
e4a181a
1
Parent(s):
9036d14
update to run on huggingface spaces
Browse files- main.py +12 -12
- tournament_utils.py +4 -4
main.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from dotenv import load_dotenv
|
2 |
-
load_dotenv("./local.env",override=True)
|
3 |
import os, json, re, ast, gradio as gr
|
4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
5 |
from tqdm import tqdm
|
@@ -40,9 +40,9 @@ PAIRWISE_FILTER_DEFAULT = os.getenv("ENABLE_PAIRWISE_FILTER", "true").lower() ==
|
|
40 |
GENERATE_MODEL_DEFAULT = os.getenv("GENERATE_MODEL", "gpt-4o-mini")
|
41 |
SCORE_MODEL_DEFAULT = os.getenv("SCORE_MODEL", "gpt-4o-mini")
|
42 |
PAIRWISE_MODEL_DEFAULT = os.getenv("PAIRWISE_MODEL", "gpt-4o-mini")
|
43 |
-
GENERATE_TEMPERATURE_DEFAULT = float(os.getenv("GENERATE_TEMPERATURE", "
|
44 |
-
SCORE_TEMPERATURE_DEFAULT = float(os.getenv("SCORE_TEMPERATURE", "0.
|
45 |
-
PAIRWISE_TEMPERATURE_DEFAULT = float(os.getenv("PAIRWISE_TEMPERATURE", "0.
|
46 |
SCORE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_SCORE", "true").lower() == "true"
|
47 |
PAIRWISE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_PAIRWISE", "true").lower() == "true"
|
48 |
GENERATE_THINKING_DEFAULT = os.getenv("ENABLE_GENERATE_THINKING", "false").lower() == "true"
|
@@ -107,9 +107,9 @@ def run_tournament(
|
|
107 |
if pairwise_temperature is None:
|
108 |
pairwise_temperature = PAIRWISE_TEMPERATURE_DEFAULT
|
109 |
if not api_base:
|
110 |
-
api_base =
|
111 |
if not api_token:
|
112 |
-
api_token =
|
113 |
if not generate_model:
|
114 |
generate_model = GENERATE_MODEL_DEFAULT
|
115 |
if not score_model:
|
@@ -322,7 +322,7 @@ def run_tournament(
|
|
322 |
demo = gr.Interface(
|
323 |
fn=run_tournament,
|
324 |
inputs=[
|
325 |
-
gr.Textbox(value=API_BASE_DEFAULT, label="API Base Path"),
|
326 |
gr.Textbox(value="", label="API Token", type="password"),
|
327 |
gr.Textbox(value=GENERATE_MODEL_DEFAULT, label="Generation Model"),
|
328 |
gr.Textbox(value=SCORE_MODEL_DEFAULT, label="Score Model"),
|
@@ -340,11 +340,11 @@ demo = gr.Interface(
|
|
340 |
gr.Checkbox(value=PAIRWISE_FILTER_DEFAULT, label="Enable Pairwise Filter"),
|
341 |
gr.Checkbox(value=SCORE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Score Model"),
|
342 |
gr.Checkbox(value=PAIRWISE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Pairwise Model"),
|
343 |
-
gr.Checkbox(value=GENERATE_THINKING_DEFAULT, label="Enable Thinking (Generate)"),
|
344 |
-
gr.Checkbox(value=SCORE_THINKING_DEFAULT, label="Enable Thinking (Score)"),
|
345 |
-
gr.Checkbox(value=PAIRWISE_THINKING_DEFAULT, label="Enable Thinking (Pairwise)"),
|
346 |
-
gr.Checkbox(value=
|
347 |
-
gr.Checkbox(value=
|
348 |
],
|
349 |
outputs=[
|
350 |
gr.Textbox(lines=10, label="Process"),
|
|
|
1 |
from dotenv import load_dotenv
|
2 |
+
# load_dotenv("./local.env",override=True)
|
3 |
import os, json, re, ast, gradio as gr
|
4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
5 |
from tqdm import tqdm
|
|
|
40 |
GENERATE_MODEL_DEFAULT = os.getenv("GENERATE_MODEL", "gpt-4o-mini")
|
41 |
SCORE_MODEL_DEFAULT = os.getenv("SCORE_MODEL", "gpt-4o-mini")
|
42 |
PAIRWISE_MODEL_DEFAULT = os.getenv("PAIRWISE_MODEL", "gpt-4o-mini")
|
43 |
+
GENERATE_TEMPERATURE_DEFAULT = float(os.getenv("GENERATE_TEMPERATURE", "1.2"))
|
44 |
+
SCORE_TEMPERATURE_DEFAULT = float(os.getenv("SCORE_TEMPERATURE", "0.1"))
|
45 |
+
PAIRWISE_TEMPERATURE_DEFAULT = float(os.getenv("PAIRWISE_TEMPERATURE", "0.1"))
|
46 |
SCORE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_SCORE", "true").lower() == "true"
|
47 |
PAIRWISE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_PAIRWISE", "true").lower() == "true"
|
48 |
GENERATE_THINKING_DEFAULT = os.getenv("ENABLE_GENERATE_THINKING", "false").lower() == "true"
|
|
|
107 |
if pairwise_temperature is None:
|
108 |
pairwise_temperature = PAIRWISE_TEMPERATURE_DEFAULT
|
109 |
if not api_base:
|
110 |
+
api_base = ""
|
111 |
if not api_token:
|
112 |
+
api_token = ""
|
113 |
if not generate_model:
|
114 |
generate_model = GENERATE_MODEL_DEFAULT
|
115 |
if not score_model:
|
|
|
322 |
demo = gr.Interface(
|
323 |
fn=run_tournament,
|
324 |
inputs=[
|
325 |
+
gr.Textbox(value=API_BASE_DEFAULT, label="API Base Path", info="Clone repos and run locally To change the API base path"),
|
326 |
gr.Textbox(value="", label="API Token", type="password"),
|
327 |
gr.Textbox(value=GENERATE_MODEL_DEFAULT, label="Generation Model"),
|
328 |
gr.Textbox(value=SCORE_MODEL_DEFAULT, label="Score Model"),
|
|
|
340 |
gr.Checkbox(value=PAIRWISE_FILTER_DEFAULT, label="Enable Pairwise Filter"),
|
341 |
gr.Checkbox(value=SCORE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Score Model"),
|
342 |
gr.Checkbox(value=PAIRWISE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Pairwise Model"),
|
343 |
+
gr.Checkbox(value=GENERATE_THINKING_DEFAULT, label="Enable Thinking (Generate)", info="Enable Qwen3 think mode"),
|
344 |
+
gr.Checkbox(value=SCORE_THINKING_DEFAULT, label="Enable Thinking (Score)" , info="Enable Qwen3 think mode"),
|
345 |
+
gr.Checkbox(value=PAIRWISE_THINKING_DEFAULT, label="Enable Thinking (Pairwise)" , info="Enable Qwen3 think mode"),
|
346 |
+
gr.Checkbox(value=True, label="Enable Explain (Score)", info="Prompt LLM to think step by step"),
|
347 |
+
gr.Checkbox(value=True, label="Enable Explain (Pairwise)", info="Prompt LLM to think step by step"),
|
348 |
],
|
349 |
outputs=[
|
350 |
gr.Textbox(lines=10, label="Process"),
|
tournament_utils.py
CHANGED
@@ -35,7 +35,7 @@ def generate_players(
|
|
35 |
"""
|
36 |
messages = [{"role": "user", "content": instruction}]
|
37 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
38 |
-
kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
39 |
response = completion(
|
40 |
model=model,
|
41 |
messages=messages,
|
@@ -64,7 +64,7 @@ def prompt_score(
|
|
64 |
return_usage: bool = False,
|
65 |
) -> str | tuple[str, object]:
|
66 |
"""Return a plaintext score evaluation for `player`."""
|
67 |
-
example_scores = ", ".join(["
|
68 |
prompt = f"""Evaluate the output below on the following criteria:
|
69 |
{criteria_block}
|
70 |
|
@@ -84,7 +84,7 @@ def prompt_score(
|
|
84 |
|
85 |
prompt += f"\n\nOutput:\n{player}"
|
86 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
87 |
-
kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
88 |
response = completion(
|
89 |
model=model,
|
90 |
messages=[{"role": "system", "content": prompt}],
|
@@ -135,7 +135,7 @@ def prompt_pairwise(
|
|
135 |
prompt += f"\n\nInstruction:\n{instruction}"
|
136 |
prompt += f"\n\nPlayers:\n<A>{a}</A>\n<B>{b}</B>"
|
137 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
138 |
-
kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
139 |
response = completion(
|
140 |
model=model,
|
141 |
messages=[{"role": "system", "content": prompt}],
|
|
|
35 |
"""
|
36 |
messages = [{"role": "user", "content": instruction}]
|
37 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
38 |
+
# kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
39 |
response = completion(
|
40 |
model=model,
|
41 |
messages=messages,
|
|
|
64 |
return_usage: bool = False,
|
65 |
) -> str | tuple[str, object]:
|
66 |
"""Return a plaintext score evaluation for `player`."""
|
67 |
+
example_scores = ", ".join(["5"] * len(criteria_list)) or "5"
|
68 |
prompt = f"""Evaluate the output below on the following criteria:
|
69 |
{criteria_block}
|
70 |
|
|
|
84 |
|
85 |
prompt += f"\n\nOutput:\n{player}"
|
86 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
87 |
+
# kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
88 |
response = completion(
|
89 |
model=model,
|
90 |
messages=[{"role": "system", "content": prompt}],
|
|
|
135 |
prompt += f"\n\nInstruction:\n{instruction}"
|
136 |
prompt += f"\n\nPlayers:\n<A>{a}</A>\n<B>{b}</B>"
|
137 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
138 |
+
# kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
139 |
response = completion(
|
140 |
model=model,
|
141 |
messages=[{"role": "system", "content": prompt}],
|