Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
device = "cpu" | |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") | |
model = AutoModelForCausalLM.from_pretrained( | |
"Qwen/Qwen2-0.5B-Instruct", | |
device_map="auto" | |
) | |
model1 = AutoModelForCausalLM.from_pretrained( | |
"Qwen/Qwen2-1.5B-Instruct", | |
device_map="auto" | |
) | |
app = FastAPI() | |
async def read_root(): | |
return {"Hello": "World!"} | |
async def model(data: dict): | |
prompt = data.get("prompt") | |
messages = [ | |
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."}, | |
{"role": "user", "content": "Who are you?"}, | |
{"role": "assistant", "content": "I am Sia, a small language model created by Sushma."}, | |
{"role": "user", "content": prompt} | |
] | |
text = tokenizer.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True | |
) | |
model_inputs = tokenizer([text], return_tensors="pt").to(device) | |
generated_ids = model.generate( | |
model_inputs.input_ids, | |
max_new_tokens=64, | |
do_sample=True | |
) | |
generated_ids = [ | |
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) | |
] | |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
return response | |
async def model1(data: dict): | |
prompt = data.get("prompt") | |
messages = [ | |
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."}, | |
{"role": "user", "content": "Who are you?"}, | |
{"role": "assistant", "content": "I am Sia, a small language model created by Sushma."}, | |
{"role": "user", "content": prompt} | |
] | |
text = tokenizer.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True | |
) | |
model_inputs = tokenizer([text], return_tensors="pt").to(device) | |
generated_ids = model.generate( | |
model_inputs.input_ids, | |
max_new_tokens=64, | |
do_sample=True | |
) | |
generated_ids = [ | |
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) | |
] | |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
return response |