Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaya
commited on
Commit
·
25580aa
1
Parent(s):
79fbee7
Update process status and config
Browse files- yourbench_space/app.py +12 -2
- yourbench_space/config.py +3 -4
- yourbench_space/utils.py +56 -26
yourbench_space/app.py
CHANGED
|
@@ -60,6 +60,16 @@ def generate_and_return(hf_org, hf_prefix):
|
|
| 60 |
)
|
| 61 |
)
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
def prepare_task(oauth_token: gr.OAuthToken | None, hf_dataset_prefix: str, _=None):
|
| 65 |
new_env = os.environ.copy()
|
|
@@ -121,7 +131,7 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
| 121 |
file_input = gr.File(
|
| 122 |
label="Upload text files",
|
| 123 |
file_count="multiple",
|
| 124 |
-
file_types=[".txt", ".md", ".html"],
|
| 125 |
)
|
| 126 |
output = gr.Textbox(label="Log")
|
| 127 |
file_input.upload(
|
|
@@ -170,7 +180,7 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
| 170 |
with gr.Row():
|
| 171 |
process_status = gr.Checkbox(label="Process Status", interactive=False)
|
| 172 |
status_timer = gr.Timer(1.0, active=True)
|
| 173 |
-
status_timer.tick(
|
| 174 |
|
| 175 |
with gr.Row():
|
| 176 |
start_button = gr.Button("Start Task")
|
|
|
|
| 60 |
)
|
| 61 |
)
|
| 62 |
|
| 63 |
+
def update_process_status():
|
| 64 |
+
"""Update process status and include exit details if process has terminated"""
|
| 65 |
+
is_running = manager.is_running()
|
| 66 |
+
|
| 67 |
+
if not is_running:
|
| 68 |
+
exit_code, exit_reason = manager.get_exit_details()
|
| 69 |
+
status_text = f"Process Status: Stopped - {exit_reason}, exit code - {exit_code}" if exit_reason else "Process Status: Stopped"
|
| 70 |
+
return gr.update(value=False, label=status_text)
|
| 71 |
+
|
| 72 |
+
return gr.update(value=True, label="Process Status: Running")
|
| 73 |
|
| 74 |
def prepare_task(oauth_token: gr.OAuthToken | None, hf_dataset_prefix: str, _=None):
|
| 75 |
new_env = os.environ.copy()
|
|
|
|
| 131 |
file_input = gr.File(
|
| 132 |
label="Upload text files",
|
| 133 |
file_count="multiple",
|
| 134 |
+
file_types=[".txt", ".md", ".html", ".pdf"],
|
| 135 |
)
|
| 136 |
output = gr.Textbox(label="Log")
|
| 137 |
file_input.upload(
|
|
|
|
| 180 |
with gr.Row():
|
| 181 |
process_status = gr.Checkbox(label="Process Status", interactive=False)
|
| 182 |
status_timer = gr.Timer(1.0, active=True)
|
| 183 |
+
status_timer.tick(update_process_status, outputs=process_status)
|
| 184 |
|
| 185 |
with gr.Row():
|
| 186 |
start_button = gr.Button("Start Task")
|
yourbench_space/config.py
CHANGED
|
@@ -12,16 +12,15 @@ def generate_base_config(hf_org, hf_prefix):
|
|
| 12 |
"hf_organization": hf_org,
|
| 13 |
"hf_dataset_name": hf_prefix,
|
| 14 |
},
|
| 15 |
-
"local_dataset_dir": "results/",
|
| 16 |
"model_list": [
|
| 17 |
{
|
| 18 |
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
|
| 19 |
-
"provider": "
|
| 20 |
"max_concurrent_requests": 32,
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"model_name": "Qwen/Qwen2.5-72B-Instruct",
|
| 24 |
-
"provider": "
|
| 25 |
"max_concurrent_requests": 32,
|
| 26 |
}
|
| 27 |
],
|
|
@@ -76,7 +75,7 @@ def generate_base_config(hf_org, hf_prefix):
|
|
| 76 |
],
|
| 77 |
},
|
| 78 |
"judge_answers": {
|
| 79 |
-
"run":
|
| 80 |
"comparing_strategies": [["zeroshot", "gold"]],
|
| 81 |
"chunk_column_index": 0,
|
| 82 |
"random_seed": 42,
|
|
|
|
| 12 |
"hf_organization": hf_org,
|
| 13 |
"hf_dataset_name": hf_prefix,
|
| 14 |
},
|
|
|
|
| 15 |
"model_list": [
|
| 16 |
{
|
| 17 |
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
|
| 18 |
+
"provider": "novita",
|
| 19 |
"max_concurrent_requests": 32,
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"model_name": "Qwen/Qwen2.5-72B-Instruct",
|
| 23 |
+
"provider": "novita",
|
| 24 |
"max_concurrent_requests": 32,
|
| 25 |
}
|
| 26 |
],
|
|
|
|
| 75 |
],
|
| 76 |
},
|
| 77 |
"judge_answers": {
|
| 78 |
+
"run": False, # to change when fixed
|
| 79 |
"comparing_strategies": [["zeroshot", "gold"]],
|
| 80 |
"chunk_column_index": 0,
|
| 81 |
"random_seed": 42,
|
yourbench_space/utils.py
CHANGED
|
@@ -19,9 +19,8 @@ STAGES = [
|
|
| 19 |
"summarization",
|
| 20 |
"chunking",
|
| 21 |
"single_shot_question_generation",
|
| 22 |
-
"multi_hop_question_generation",
|
| 23 |
"answer_generation",
|
| 24 |
-
"judge_answers",
|
| 25 |
]
|
| 26 |
|
| 27 |
|
|
@@ -56,6 +55,7 @@ class SubprocessManager:
|
|
| 56 |
self.command = command
|
| 57 |
self.process = None
|
| 58 |
self.output_stream = io.StringIO()
|
|
|
|
| 59 |
|
| 60 |
def start_process(self, custom_env: dict | None):
|
| 61 |
"""Start the subprocess."""
|
|
@@ -64,20 +64,30 @@ class SubprocessManager:
|
|
| 64 |
return
|
| 65 |
|
| 66 |
self.output_stream = io.StringIO()
|
| 67 |
-
self.
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
def read_and_get_output(self):
|
| 80 |
"""Read subprocess output, capture it, and return log and completed stages."""
|
|
|
|
|
|
|
|
|
|
| 81 |
if self.process and self.process.stdout:
|
| 82 |
try:
|
| 83 |
while True:
|
|
@@ -89,10 +99,9 @@ class SubprocessManager:
|
|
| 89 |
except BlockingIOError:
|
| 90 |
pass
|
| 91 |
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
)
|
| 96 |
return current_output, completed_stages
|
| 97 |
|
| 98 |
def stop_process(self):
|
|
@@ -101,10 +110,13 @@ class SubprocessManager:
|
|
| 101 |
logger.info("Process is not running")
|
| 102 |
return
|
| 103 |
logger.info("Sending SIGTERM to the Process")
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
def kill_process(self):
|
| 110 |
"""Forcefully kill the subprocess"""
|
|
@@ -112,11 +124,29 @@ class SubprocessManager:
|
|
| 112 |
logger.info("Process is not running")
|
| 113 |
return
|
| 114 |
logger.info("Sending SIGKILL to the Process")
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
| 119 |
|
| 120 |
def is_running(self):
|
| 121 |
"""Check if the subprocess is still running"""
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
"summarization",
|
| 20 |
"chunking",
|
| 21 |
"single_shot_question_generation",
|
|
|
|
| 22 |
"answer_generation",
|
| 23 |
+
# "judge_answers", # to uncomment when fixed
|
| 24 |
]
|
| 25 |
|
| 26 |
|
|
|
|
| 55 |
self.command = command
|
| 56 |
self.process = None
|
| 57 |
self.output_stream = io.StringIO()
|
| 58 |
+
self.exit_code = None
|
| 59 |
|
| 60 |
def start_process(self, custom_env: dict | None):
|
| 61 |
"""Start the subprocess."""
|
|
|
|
| 64 |
return
|
| 65 |
|
| 66 |
self.output_stream = io.StringIO()
|
| 67 |
+
self.exit_code = None
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
logger.info(f"Starting process with command: {' '.join(self.command)}")
|
| 71 |
+
self.process = subprocess.Popen(
|
| 72 |
+
self.command,
|
| 73 |
+
stdout=subprocess.PIPE,
|
| 74 |
+
stderr=subprocess.STDOUT, # Combine stderr with stdout
|
| 75 |
+
text=True,
|
| 76 |
+
bufsize=1,
|
| 77 |
+
start_new_session=True,
|
| 78 |
+
env=custom_env,
|
| 79 |
+
)
|
| 80 |
+
os.set_blocking(self.process.stdout.fileno(), False)
|
| 81 |
+
logger.info(f"Started process with PID: {self.process.pid}")
|
| 82 |
+
except Exception as e:
|
| 83 |
+
logger.error(f"Failed to start process: {str(e)}")
|
| 84 |
+
return
|
| 85 |
|
| 86 |
def read_and_get_output(self):
|
| 87 |
"""Read subprocess output, capture it, and return log and completed stages."""
|
| 88 |
+
current_output = ""
|
| 89 |
+
completed_stages = []
|
| 90 |
+
|
| 91 |
if self.process and self.process.stdout:
|
| 92 |
try:
|
| 93 |
while True:
|
|
|
|
| 99 |
except BlockingIOError:
|
| 100 |
pass
|
| 101 |
|
| 102 |
+
current_output = self.output_stream.getvalue()
|
| 103 |
+
completed_stages = list(set(re.findall(r"Successfully completed stage: (\w+)", current_output)))
|
| 104 |
+
|
|
|
|
| 105 |
return current_output, completed_stages
|
| 106 |
|
| 107 |
def stop_process(self):
|
|
|
|
| 110 |
logger.info("Process is not running")
|
| 111 |
return
|
| 112 |
logger.info("Sending SIGTERM to the Process")
|
| 113 |
+
try:
|
| 114 |
+
self.process.terminate()
|
| 115 |
+
self.exit_code = self.process.wait(timeout=5) # Wait up to 5 seconds for process to terminate
|
| 116 |
+
logger.info(f"Process terminated by user with exit code {self.exit_code}")
|
| 117 |
+
except subprocess.TimeoutExpired:
|
| 118 |
+
logger.warning("Process did not terminate within timeout, sending SIGKILL")
|
| 119 |
+
self.kill_process()
|
| 120 |
|
| 121 |
def kill_process(self):
|
| 122 |
"""Forcefully kill the subprocess"""
|
|
|
|
| 124 |
logger.info("Process is not running")
|
| 125 |
return
|
| 126 |
logger.info("Sending SIGKILL to the Process")
|
| 127 |
+
try:
|
| 128 |
+
self.process.kill()
|
| 129 |
+
self.exit_code = self.process.wait(timeout=5) # Wait up to 5 seconds for process to be killed
|
| 130 |
+
logger.info(f"Process killed by user with exit code {self.exit_code}")
|
| 131 |
+
except subprocess.TimeoutExpired:
|
| 132 |
+
logger.error("Process could not be killed within timeout")
|
| 133 |
|
| 134 |
def is_running(self):
|
| 135 |
"""Check if the subprocess is still running"""
|
| 136 |
+
if self.process is None:
|
| 137 |
+
return False
|
| 138 |
+
|
| 139 |
+
return self.process.poll() is None
|
| 140 |
+
|
| 141 |
+
def get_exit_details(self):
|
| 142 |
+
"""Return exit code and reason if process has terminated"""
|
| 143 |
+
if self.process is None:
|
| 144 |
+
return None, "Process was never started"
|
| 145 |
+
|
| 146 |
+
if self.is_running():
|
| 147 |
+
return None, "Process is still running"
|
| 148 |
+
|
| 149 |
+
if not self.exit_code is None and self.exit_code != 0 :
|
| 150 |
+
return self.exit_code, "Process exited abnormaly"
|
| 151 |
+
|
| 152 |
+
return self.exit_code, "Process exited normaly"
|