File size: 1,236 Bytes
8a8fb30
 
1fa4878
8a8fb30
936f407
8a8fb30
 
 
936f407
 
 
1fa4878
936f407
1fa4878
 
f02bd6a
1fa4878
 
 
 
f02bd6a
8a8fb30
936f407
 
 
8a8fb30
 
 
936f407
 
 
 
1fa4878
 
 
936f407
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import uvicorn

app = FastAPI()

# Model name (update with your actual model path on Hugging Face)
model_name = "waynebruce2110/GraveSocialAI"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, local_files_only=False)

# Load model with 8-bit quantization on CPU
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    local_files_only=False,
    torch_dtype=torch.float16,  # Reduces memory usage
    device_map="cpu"  # Forces model to load on CPU
)

# Define input schema
class PromptInput(BaseModel):
    prompt: str

@app.get("/")
def read_root():
    return {"message": "GraveSocialAI API is running!"}

@app.post("/generate/")
def generate_text(data: PromptInput):
    inputs = tokenizer(data.prompt, return_tensors="pt").to("cpu")  # Ensure input is on CPU
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=100)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return {"generated_text": response}

# Ensure the app runs when executed
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)