from fastapi import FastAPI, HTTPException from pydantic import BaseModel from transformers import AutoModelForCausalLM, AutoTokenizer import torch class UserRequest(BaseModel): prompt: str app = FastAPI() # Load the model and tokenizer model_name = "Artples/L-MChat-7b" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Make sure the model is on CPU device = torch.device("cpu") model.to(device) @app.post("/generate/") async def generate(request: UserRequest): try: # Tokenize the prompt inputs = tokenizer.encode(request.prompt, return_tensors="pt") inputs = inputs.to(device) # Generate a response from the model output = model.generate(inputs, max_length=100, num_return_sequences=1) response_text = tokenizer.decode(output[0], skip_special_tokens=True) return {"response": response_text} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)