Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
app = FastAPI() | |
def greet_json(): | |
return {"Hello": "World!"} | |
def say_hello(msg: str): | |
print("model") | |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it") | |
model = AutoModelForCausalLM.from_pretrained( | |
"google/gemma-2b-it", | |
device_map="auto", | |
torch_dtype=torch.bfloat16 | |
) | |
print("token & msg") | |
input_ids = tokenizer(msg, return_tensors="pt").to("cpu") | |
print("output") | |
outputs = model.generate(**input_ids, max_length=500) | |
print("complete") | |
return {"message": tokenizer.decode(outputs[0])} |