Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import subprocess | |
from pathlib import Path | |
import time | |
import requests | |
def check_services(): | |
"""Check if all required services are running""" | |
services = [ | |
("Controller", "http://localhost:21001"), | |
("API Server", "http://localhost:8000"), | |
("Model Worker", "http://localhost:8080") | |
] | |
for service_name, url in services: | |
try: | |
requests.get(url) | |
print(f"{service_name} is running") | |
except requests.exceptions.ConnectionError: | |
return False, f"{service_name} is not running" | |
return True, "All services are running" | |
def check_training_status(): | |
# First check if services are running | |
services_ok, message = check_services() | |
if not services_ok: | |
return message | |
results_dir = Path("/app/results") | |
if not results_dir.exists(): | |
return "Training hasn't started yet." | |
iterations = len(list(results_dir.glob("iter_*"))) | |
return f"Completed {iterations} training iterations." | |
def start_training(model_path, instruct_count, max_iter): | |
# Check if services are running | |
services_ok, message = check_services() | |
if not services_ok: | |
return message | |
os.environ["MODEL_PATH"] = model_path | |
os.environ["INSTRUCT_COUNT"] = str(instruct_count) | |
os.environ["MAX_ITER"] = str(max_iter) | |
try: | |
subprocess.run(["bash", "run.sh"], | |
check=True, | |
cwd="/app/qwen") | |
return "Training completed successfully!" | |
except subprocess.CalledProcessError as e: | |
return f"Error during training: {str(e)}" | |
# Create the interface | |
with gr.Blocks() as iface: | |
gr.Markdown("# Self-Lengthen Training Interface") | |
with gr.Row(): | |
with gr.Column(): | |
model_path = gr.Textbox( | |
label="Model Path", | |
value="/app/models/base_model", | |
info="Path to the base model" | |
) | |
instruct_count = gr.Number( | |
label="Instruction Count", | |
value=5000, | |
minimum=100, | |
info="Number of instructions to generate" | |
) | |
max_iter = gr.Number( | |
label="Max Iterations", | |
value=3, | |
minimum=1, | |
info="Number of training iterations" | |
) | |
train_btn = gr.Button("Start Training") | |
with gr.Column(): | |
status_output = gr.Textbox( | |
label="Status", | |
value="Ready to start training...", | |
interactive=False | |
) | |
refresh_btn = gr.Button("Refresh Status") | |
train_btn.click( | |
fn=start_training, | |
inputs=[model_path, instruct_count, max_iter], | |
outputs=status_output | |
) | |
refresh_btn.click( | |
fn=check_training_status, | |
inputs=None, | |
outputs=status_output | |
) | |
if __name__ == "__main__": | |
# Wait for services to be ready | |
print("Waiting for services to start...") | |
while True: | |
services_ok, message = check_services() | |
if services_ok: | |
break | |
print(message) | |
time.sleep(5) | |
print("All services are running, starting web interface...") | |
iface.launch(server_name="0.0.0.0", server_port=7860) |