Spaces:
Runtime error
Runtime error
import os | |
import torch | |
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
# Set Hugging Face cache directory to avoid permission issues in Docker | |
os.environ["HF_HOME"] = "/app/huggingface_cache" | |
os.environ["TRANSFORMERS_CACHE"] = "/app/huggingface_cache" | |
app = FastAPI() | |
class TextGenerationRequest(BaseModel): | |
prompt: str | |
max_length: int = 100 | |
temperature: float = 0.7 | |
# Load model and tokenizer (Force CPU) | |
model_name = "unsloth/Qwen2.5-7B-bnb-4bit" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
trust_remote_code=True, | |
torch_dtype=torch.float32, # Use float32 for CPU | |
device_map={"": "cpu"} # Ensure CPU usage | |
) | |
def api_home(): | |
return {"detail": "Welcome to FastAPI TextGen API!"} | |
async def generate_text(request: TextGenerationRequest): | |
try: | |
inputs = tokenizer(request.prompt, return_tensors="pt").to("cpu") # Ensure CPU usage | |
outputs = model.generate( | |
inputs.input_ids, | |
max_length=request.max_length, | |
temperature=request.temperature, | |
do_sample=True, | |
) | |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return {"generated_text": generated_text} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |