Spaces:

Vishwas1
/

LLMTrainingPro

Sleeping

File size: 3,808 Bytes

981a076
e1e315b
 
 
 
 
36071c5
7da4761
981a076
e1e315b
 
 
981a076
 
 
 
 
 
 
 
 
7da4761
981a076
 
3042d4c
7da4761
 
 
 
 
 
 
 
 
 
e1e315b
 
 
 
 
2de0e9b
 
e1e315b
981a076
deddd5d
 
 
 
 
 
 
981a076
e1e315b
 
 
deddd5d
981a076
e1e315b
 
 
 
 
2de0e9b
 
e1e315b
2de0e9b
21a5890
2de0e9b
 
e1e315b
21a5890
e1e315b
 
2de0e9b
981a076
 
 
 
 
e1e315b
2de0e9b
 
 
e1e315b
deddd5d
 
e1e315b
 
 
deddd5d
e1e315b
deddd5d
2de0e9b
 
 
 
 
deddd5d
 
 
 
 
 
 
 
 
 
 
 
981a076

# training_space/app.py (FastAPI Backend)
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import subprocess
import os
import uuid
from huggingface_hub import HfApi, HfFolder
from fastapi.middleware.cors import CORSMiddleware
import logging

app = FastAPI()

# Configure Logging
logging.basicConfig(
    filename='training.log',
    filemode='a',
    format='%(asctime)s - %(levelname)s - %(message)s',
    level=logging.INFO
)

# CORS Configuration
origins = [
    "https://Vishwas1-LLMBuilderPro.hf.space",  # Replace with your Gradio frontend Space URL
    "http://localhost",  # For local testing
    "https://web.postman.co",
]

app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Define the expected payload structure
class TrainingRequest(BaseModel):
    task: str  # 'generation' or 'classification'
    model_params: dict
    model_name: str
    dataset_name: str  # The name of the existing Hugging Face dataset


# Root Endpoint
@app.get("/")
def read_root():
    return {
        "message": "Welcome to the Training Space API!",
        "instructions": "To train a model, send a POST request to /train with the required parameters."
    }

# Train Endpoint
@app.post("/train")
def train_model(request: TrainingRequest):
    try:
        logging.info(f"Received training request for model: {request.model_name}, Task: {request.task}")
        
        # Create a unique directory for this training session
        session_id = str(uuid.uuid4())
        session_dir = f"./training_sessions/{session_id}"
        os.makedirs(session_dir, exist_ok=True)
        
        # No need to save dataset content; use dataset_name directly
        dataset_name = request.dataset_name
        
        # Define the absolute path to train_model.py
        TRAIN_MODEL_PATH = os.path.join(os.path.dirname(__file__), "train_model.py")
        
        # Prepare the command to run the training script with dataset_name
        cmd = [
            "python", TRAIN_MODEL_PATH,
            "--task", request.task,
            "--model_name", request.model_name,
            "--dataset_name", dataset_name,  # Pass dataset_name instead of dataset file path
            "--num_layers", str(request.model_params.get('num_layers', 12)),
            "--attention_heads", str(request.model_params.get('attention_heads', 1)),
            "--hidden_size", str(request.model_params.get('hidden_size', 64)),
            "--vocab_size", str(request.model_params.get('vocab_size', 30000)),
            "--sequence_length", str(request.model_params.get('sequence_length', 512))
        ]
        
        # Start the training process as a background task in the root directory
        subprocess.Popen(cmd, cwd=os.path.dirname(__file__))
        
        logging.info(f"Training started for model: {request.model_name}, Session ID: {session_id}")
        
        return {"status": "Training started", "session_id": session_id}
    
    except Exception as e:
        logging.error(f"Error during training request: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

    
    except Exception as e:
        logging.error(f"Error during training request: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

# Optional: Status Endpoint
@app.get("/status/{session_id}")
def get_status(session_id: str):
    session_dir = f"./training_sessions/{session_id}"
    log_file = os.path.join(session_dir, "training.log")
    if not os.path.exists(log_file):
        raise HTTPException(status_code=404, detail="Session ID not found.")
    
    with open(log_file, "r", encoding="utf-8") as f:
        logs = f.read()
    
    return {"session_id": session_id, "logs": logs}