Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import spaces | |
from transformers import AutoTokenizer, AutoModel | |
model_name = "teknium/OpenHermes-2.5-Mistral-7B" | |
token = os.environ['hf_token'] | |
model = AutoModel.from_pretrained(model_name).cuda() | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
system_prompt = '''You are given an input text for a chat interface. Propose auto-completion to the text. You have several roles: | |
- Fight under-specification: if the user does not provide sufficient context, propose them a set of relevant suggestions. | |
- Complete text: The text provided to you is in the making. If you have a good idea for how to complete - make suggestions. | |
Make sure the suggestions are valid completions of the text! No need for them to complete the text completely. | |
Suggest only up to 5 works ahead. | |
''' | |
def generate(text): | |
data = [ | |
{'role': 'system', 'content': system_prompt}, | |
{'role': 'user', 'content': text} | |
] | |
tokenized = tokenizer.apply_chat_template(data, return_tensors='pt') | |
return tokenizer.deocode(model.generate(**tokenized).squeeze(0)) |