Spaces:

Artples
/

L-MChat-ZeroGPU

Running on Zero

L-MChat-ZeroGPU / app.py

Update app.py

dd389e3 verified 8 months ago

1.13 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	class UserRequest(BaseModel):
	prompt: str

	app = FastAPI()

	# Load the model and tokenizer
	model_name = "Artples/L-MChat-7b"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)

	# Make sure the model is on CPU
	device = torch.device("cpu")
	model.to(device)

	@app.post("/generate/")
	async def generate(request: UserRequest):
	try:
	# Tokenize the prompt
	inputs = tokenizer.encode(request.prompt, return_tensors="pt")
	inputs = inputs.to(device)

	# Generate a response from the model
	output = model.generate(inputs, max_length=100, num_return_sequences=1)
	response_text = tokenizer.decode(output[0], skip_special_tokens=True)

	return {"response": response_text}
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)