Spaces:

eneSadi
/

cosmos-llama-flask

Sleeping

File size: 2,117 Bytes

219ad87
c65cfb2
 
eb9fa2c
0235536
7dea212
0235536
c65cfb2
aca3716
c65cfb2
 
 
629dd02
0235536
c65cfb2
aca3716
c65cfb2
0235536
 
6227a05
 
0235536
 
c65cfb2
aca3716
c65cfb2
 
 
 
 
 
 
 
 
219ad87
 
 
 
 
 
 
aca3716
0235536
 
 
aca3716
 
0235536
 
 
 
 
 
 
 
 
 
aca3716
 
c65cfb2
0235536
 
6227a05
0235536
 
 
 
 
 
c65cfb2
 
0235536
c65cfb2
219ad87

from fastapi import FastAPI, Request
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

print("COSMOS Llama Chatbot is starting...")

model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"

print("Model loading started")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
print("Model loading completed")

# bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
initial_message = [
    {"role": "system", "content": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak."}
    # Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.
]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Selected device:", device)

app = FastAPI()


@app.get('/')
def home():
    return {"hello": "Bitfumes"}


@app.post('/ask')
async def ask(request: Request):
    data = await request.json()
    prompt = data.get("prompt")
    if not prompt:
        return {"error": "Prompt is missing"}

    print("Device of the model:", model.device)
    messages = initial_message.copy()
    messages.append({"role": "user", "content": f"{prompt}"})

    print("Messages:", messages)
    print("Tokenizer process started")
    input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    print("Tokenizer process completed")

    print("Model process started")
    outputs = model.generate(
        input_ids,
        max_new_tokens=256,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )
    response = outputs[0][input_ids.shape[-1]:]

    print("Tokenizer decode process started")
    answer = tokenizer.decode(response, skip_special_tokens=True)

    return {"answer": answer}