Spaces:
Runtime error
Runtime error
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
app = FastAPI() | |
class TextGenerationRequest(BaseModel): | |
prompt: str | |
max_length: int = 100 | |
temperature: float = 0.7 | |
# Load model and tokenizer (force CPU usage) | |
model_name = "unsloth/Qwen2.5-7B-bnb-4bit" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
trust_remote_code=True, | |
torch_dtype=torch.float32, # Change to float32 for CPU | |
device_map="cpu" # Force CPU usage | |
) | |
def api_home(): | |
return {'detail': 'Welcome to FastAPI TextGen Tutorial!'} | |
async def generate_text(request: TextGenerationRequest): | |
try: | |
inputs = tokenizer(request.prompt, return_tensors="pt").to("cpu") # Move to CPU | |
outputs = model.generate( | |
inputs.input_ids, | |
max_length=request.max_length, | |
temperature=request.temperature, | |
do_sample=True, | |
) | |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return {"generated_text": generated_text} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |