Spaces:

eneSadi
/

cosmos-llama-flask

Sleeping

File size: 2,395 Bytes

219ad87
c65cfb2
 
eb9fa2c
ff9863c
7dea212
ff9863c
c65cfb2
aca3716
c65cfb2
 
 
32c2259
ff9863c
c65cfb2
aca3716
c65cfb2
ff9863c
 
dd79f4f
 
 
 
 
 
 
 
 
 
 
ff9863c
 
c65cfb2
aca3716
c65cfb2
 
 
 
 
 
 
 
 
219ad87
 
 
 
 
 
 
aca3716
ff9863c
 
 
aca3716
 
ff9863c
 
 
 
 
 
 
 
 
 
aca3716
 
c65cfb2
ff9863c
 
dd79f4f
ff9863c
 
 
 
 
 
c65cfb2
 
ff9863c
c65cfb2
219ad87

from fastapi import FastAPI, Request
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

print("COSMOS Llama Chatbot is starting...")

model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"

print("Model loading started")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
print("Model loading completed")

# bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
initial_message = [
    {"role": "system", "content":
        """Kullanıcı sana bir haber metni verecek. Bu haber metninin önemli kısımlarını özetleyen 5 cümle çıkart. Aynı zamanda bu cümlelerin her birinden bir keyword extract et ve eğer varsa NER ile yer, kişi, tarih gibi alanları extract et. Yoksa karşısını boş bırak. Çıktıların şu formatta olsun:
    1. Cümle: Cumhurbaşkanı Erdoğan tatile çıktı.
    Keyword: tatil
    NER: Cumhurbaşkanı Erdoğan

    2. Cümle: ...
    Keyword: ...
    NER: ...
    """
     }
]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Selected device:", device)

app = FastAPI()


@app.get('/')
def home():
    return {"hello": "Bitfumes"}


@app.post('/ask')
async def ask(request: Request):
    data = await request.json()
    prompt = data.get("prompt")
    if not prompt:
        return {"error": "Prompt is missing"}

    print("Device of the model:", model.device)
    messages = initial_message.copy()
    messages.append({"role": "user", "content": f"{prompt}"})

    print("Messages:", messages)
    print("Tokenizer process started")
    input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    print("Tokenizer process completed")

    print("Model process started")
    outputs = model.generate(
        input_ids,
        max_new_tokens=512,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )
    response = outputs[0][input_ids.shape[-1]:]

    print("Tokenizer decode process started")
    answer = tokenizer.decode(response, skip_special_tokens=True)

    return {"answer": answer}