File size: 2,658 Bytes
0fc7538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728e771
0fc7538
 
 
 
2ae8beb
0fc7538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728e771
0fc7538
 
 
 
2ae8beb
0fc7538
 
 
 
 
 
 
728e771
0fc7538
 
 
 
 
 
 
 
 
728e771
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from fastapi import FastAPI
import torch

from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cpu"

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2-0.5B-Instruct",
    device_map="auto"
)

model1 = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2-1.5B-Instruct",
    device_map="auto"
)

app = FastAPI()

@app.get("/")
async def read_root():
    return {"Hello": "World!"}

def modelResp(promt):
    messages = [
        {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
        {"role": "user", "content": "Who are you?"},
        {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
        {"role": "user", "content": f"{prompt}"}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)    
    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=64,
        do_sample=True
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ] 
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return response

def modelResp1(promt):
    messages = [
        {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
        {"role": "user", "content": "Who are you?"},
        {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
        {"role": "user", "content": f"{prompt}"}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)    
    generated_ids = model1.generate(
        model_inputs.input_ids,
        max_new_tokens=64,
        do_sample=True
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ] 
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return response
    
@app.post("/modelapi")
async def modelApi(data: dict):
    prompt = data.get("prompt")
    response = modelResp(prompt)
    return response

@app.post("/modelapi1")
async def modelApi1(data: dict):
    prompt = data.get("prompt")
    response = modelResp1(prompt)
    return response