advanced / app.py
Alina Lozovskaya
Add kill task
3119795
raw
history blame
9.03 kB
import os
import sys
import pathlib
import shutil
import threading
import multiprocessing
import io
import yaml
import gradio as gr
from loguru import logger
from yourbench.pipeline import run_pipeline
UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
UPLOAD_DIRECTORY.mkdir(parents=True, exist_ok=True)
CONFIG_PATH = pathlib.Path("/app/yourbench_config.yml")
logger.remove()
logger.add(sys.stderr, level="INFO")
import subprocess
import io
import os
import time
class SubprocessManager:
def __init__(self, command):
self.command = command
self.process = None
self.output_stream = io.StringIO()
def start_process(self):
"""Start the subprocess."""
if self.is_running():
logger.info("Process is already running")
return
self.process = subprocess.Popen(
self.command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, # Combine stderr with stdout
text=True,
bufsize=1, # Line-buffered
start_new_session=True # Start the process in a new session
)
os.set_blocking(self.process.stdout.fileno(), False)
logger.info("Started the process")
def read_and_get_output(self):
"""Read available subprocess output and return the captured output."""
if self.process and self.process.stdout:
try:
while True:
line = self.process.stdout.readline()
if line:
self.output_stream.write(line) # Capture in StringIO
else:
break
except BlockingIOError:
pass
return self.output_stream.getvalue()
def stop_process(self):
"""Terminate the subprocess."""
if not self.is_running():
logger.info("Started the process")
return
logger.info("Sending SIGTERM to the Process")
self.process.terminate()
exit_code = self.process.wait() # Wait for process to terminate
logger.info(f"Process stopped exit code {exit_code}")
#return exit_code
def kill_process(self):
"""Forcefully kill the subprocess."""
if not self.is_running():
logger.info("Process is not running")
return
logger.info("Sending SIGKILL to the Process")
self.process.kill()
exit_code = self.process.wait() # Wait for process to be killed
logger.info(f"Process killed exit code {exit_code}")
#return exit_code
def is_running(self):
"""Check if the subprocess is still running."""
return self.process and self.process.poll() is None
command = ["uv", "run", "yourbench", f"--config={CONFIG_PATH}"]
manager = SubprocessManager(command)
def generate_config(hf_token, hf_org, model_name, provider, base_url, api_key, max_concurrent_requests):
config = {
"hf_configuration": {
"token": hf_token,
"private": True,
"hf_organization": hf_org
},
"model_list": [{
"model_name": model_name,
"provider": provider,
"base_url": base_url,
"api_key": api_key,
"max_concurrent_requests": max_concurrent_requests
}],
"model_roles": {role: [model_name] for role in [
"ingestion", "summarization", "single_shot_question_generation",
"multi_hop_question_generation", "answer_generation", "judge_answers"
]},
"inference_config": {"max_concurrent_requests": 16},
"pipeline": {
"ingestion": {
"source_documents_dir": "/app/uploaded_files",
"output_dir": "/app/ingested",
"run": True
},
"upload_ingest_to_hub": {
"source_documents_dir": "/app/ingested",
"hub_dataset_name": "test_ingested_documents",
"local_dataset_path": "/app/ingested_dataset",
"run": True
},
"summarization": {
"source_dataset_name": "test_ingested_documents",
"output_dataset_name": "test_summaries",
"local_dataset_path": "/results/test_summaries",
"concat_existing_dataset": False,
"run": True
},
"chunking": {
"source_dataset_name": "test_summaries",
"output_dataset_name": "test_chunked_documents",
"local_dataset_path": "/results/test_chunked_documents",
"concat_existing_dataset": False,
"chunking_configuration": {
"l_min_tokens": 64,
"l_max_tokens": 128,
"tau_threshold": 0.3,
"h_min": 2,
"h_max": 4
},
"run": True
},
"single_shot_question_generation": {
"source_dataset_name": "test_chunked_documents",
"output_dataset_name": "test_single_shot_questions",
"local_dataset_path": "/results/test_single_shot_questions",
"diversification_seed": "24 year old adult",
"concat_existing_dataset": False,
"run": True
},
"multi_hop_question_generation": {
"source_dataset_name": "test_chunked_documents",
"output_dataset_name": "test_multi_hop_questions",
"local_dataset_path": "/results/test_multi_hop_questions",
"concat_existing_dataset": False,
"run": True
},
"answer_generation": {
"run": True,
"question_dataset_name": "test_single_shot_questions",
"output_dataset_name": "test_answered_questions",
"local_dataset_path": "/results/test_answered_questions",
"concat_existing_dataset": False,
"strategies": [{
"name": "zeroshot",
"prompt": "ZEROSHOT_QA_USER_PROMPT",
"model_name": model_name
}, {
"name": "gold",
"prompt": "GOLD_QA_USER_PROMPT",
"model_name": model_name
}]
},
"judge_answers": {
"run": True,
"source_judge_dataset_name": "test_answered_questions",
"output_judged_dataset_name": "test_judged_comparisons",
"local_dataset_path": "/results/test_judged_comparisons",
"concat_existing_dataset": False,
"comparing_strategies": [["zeroshot", "gold"]],
"chunk_column_index": 0,
"random_seed": 42
}
}
}
return yaml.dump(config, default_flow_style=False)
def save_config(yaml_text):
with open(CONFIG_PATH, "w") as file:
file.write(yaml_text)
return "✅ Config saved!"
def save_files(files: list[str]):
saved_paths = [shutil.move(str(pathlib.Path(file)), str(UPLOAD_DIRECTORY / pathlib.Path(file).name)) for file in files]
return f"Files saved to: {', '.join(saved_paths)}"
app = gr.Blocks()
with app:
gr.Markdown("## YourBench Configuration")
with gr.Tab("Configuration"):
hf_token = gr.Textbox(label="HF Token")
hf_org = gr.Textbox(label="HF Organization")
model_name = gr.Textbox(label="Model Name")
provider = gr.Dropdown(["openrouter", "openai", "huggingface"], value="huggingface", label="Provider")
base_url = gr.Textbox(label="Base URL")
api_key = gr.Textbox(label="API Key")
max_concurrent_requests = gr.Dropdown([8, 16, 32], value=16, label="Max Concurrent Requests")
config_output = gr.Code(label="Generated Config", language="yaml")
preview_button = gr.Button("Generate Config")
save_button = gr.Button("Save Config")
preview_button.click(generate_config, inputs=[hf_token, hf_org, model_name, provider, base_url, api_key, max_concurrent_requests], outputs=config_output)
save_button.click(save_config, inputs=[config_output], outputs=[gr.Textbox(label="Save Status")])
with gr.Tab("Files"):
file_input = gr.File(label="Upload text files", file_count="multiple", file_types=[".txt", ".md", ".html"])
output = gr.Textbox(label="Log")
file_input.upload(save_files, file_input, output)
with gr.Tab("Run Generation"):
log_output = gr.Code(label="Log Output", language=None, lines=20, interactive=False)
start_button = gr.Button("Start Task")
start_button.click(manager.start_process)
timer = gr.Timer(0.1, active=True)
timer.tick(manager.read_and_get_output, outputs=log_output)
start_button = gr.Button("Kill Task")
start_button.click(manager.kill_process)
app.launch()