Spaces:
Sleeping
Sleeping
File size: 1,064 Bytes
d0d12ff 38ede89 d0d12ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import os
import gradio as gr
import spaces
from transformers import AutoTokenizer, AutoModel
model_name = "teknium/OpenHermes-2.5-Mistral-7B"
token = os.environ['hf_token']
pipe = pipeline("text-generation", model=model_name, device="cuda")
system_prompt = '''You are given an input text for a chat interface. Propose auto-completion to the text. You have several roles:
- Fight under-specification: if the user does not provide sufficient context, propose them a set of relevant suggestions.
- Complete text: The text provided to you is in the making. If you have a good idea for how to complete - make suggestions.
Make sure the suggestions are valid completions of the text! No need for them to complete the text completely.
Suggest only up to 5 works ahead.
'''
@spaces.GPU
def generate(text):
data = [
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': text}
]
tokenized = tokenizer.apply_chat_template(data, return_tensors='pt')
return tokenizer.deocode(model.generate(**tokenized).squeeze(0)) |