Spaces:
Running
Running
File size: 1,236 Bytes
8a8fb30 1fa4878 8a8fb30 936f407 8a8fb30 936f407 1fa4878 936f407 1fa4878 f02bd6a 1fa4878 f02bd6a 8a8fb30 936f407 8a8fb30 936f407 1fa4878 936f407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import uvicorn
app = FastAPI()
# Model name (update with your actual model path on Hugging Face)
model_name = "waynebruce2110/GraveSocialAI"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, local_files_only=False)
# Load model with 8-bit quantization on CPU
model = AutoModelForCausalLM.from_pretrained(
model_name,
local_files_only=False,
torch_dtype=torch.float16, # Reduces memory usage
device_map="cpu" # Forces model to load on CPU
)
# Define input schema
class PromptInput(BaseModel):
prompt: str
@app.get("/")
def read_root():
return {"message": "GraveSocialAI API is running!"}
@app.post("/generate/")
def generate_text(data: PromptInput):
inputs = tokenizer(data.prompt, return_tensors="pt").to("cpu") # Ensure input is on CPU
with torch.no_grad():
outputs = model.generate(**inputs, max_length=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return {"generated_text": response}
# Ensure the app runs when executed
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)
|