Spaces:

eneSadi
/

cosmos-llama-flask

Sleeping

App Files Files Community

cosmos-llama-flask / app.py

eneSadi

activate NER

1751116 unverified 29 days ago

raw

history blame contribute delete

2.4 kB

	from fastapi import FastAPI, Request
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	print("COSMOS Llama Chatbot is starting...")

	model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"

	print("Model loading started")
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)
	print("Model loading completed")

	# bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
	initial_message = [
	{"role": "system", "content":
	"""Kullanıcı sana bir haber metni verecek. Bu haber metninin önemli kısımlarını özetleyen 5 cümle çıkart. Aynı zamanda bu cümlelerin her birinden bir keyword extract et ve eğer varsa NER ile yer, kişi, tarih gibi alanları extract et. Yoksa karşısını boş bırak. Çıktıların şu formatta olsun:
	1. Cümle: Cumhurbaşkanı Erdoğan tatile çıktı.
	Keyword: tatil
	NER: Cumhurbaşkanı Erdoğan

	2. Cümle: ...
	Keyword: ...
	NER: ...
	"""
	}
	]

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print("Selected device:", device)

	app = FastAPI()


	@app.get('/')
	def home():
	return {"hello": "Bitfumes"}


	@app.post('/ask')
	async def ask(request: Request):
	data = await request.json()
	prompt = data.get("prompt")
	if not prompt:
	return {"error": "Prompt is missing"}

	print("Device of the model:", model.device)
	messages = initial_message.copy()
	messages.append({"role": "user", "content": f"{prompt}"})

	print("Messages:", messages)
	print("Tokenizer process started")
	input_ids = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	return_tensors="pt"
	).to(model.device)

	terminators = [
	tokenizer.eos_token_id,
	tokenizer.convert_tokens_to_ids("<\|eot_id\|>")
	]
	print("Tokenizer process completed")

	print("Model process started")
	outputs = model.generate(
	input_ids,
	max_new_tokens=512,
	eos_token_id=terminators,
	do_sample=True,
	temperature=0.6,
	top_p=0.9,
	)
	response = outputs[0][input_ids.shape[-1]:]

	print("Tokenizer decode process started")
	answer = tokenizer.decode(response, skip_special_tokens=True)

	return {"answer": answer}