Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -19,8 +19,36 @@ system_prompt = '''You are given a partial input text for a chat interface. Prop
|
|
19 |
Don't suggest anything if there are no good suggestions.
|
20 |
Make sure the suggestions are valid completions of the text! No need for them to complete the text completely.
|
21 |
Suggest only up to 5 works ahead. The scheme of your answer should be "answer1;answer2;answer3" (return between 0 to 4 answers).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
'''
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
@spaces.GPU
|
25 |
def generate(text):
|
26 |
messages = [
|
@@ -32,5 +60,6 @@ def generate(text):
|
|
32 |
|
33 |
|
34 |
if __name__ == "__main__":
|
|
|
35 |
demo = gr.Interface(generate, inputs="textbox", outputs="textbox")
|
36 |
demo.launch()
|
|
|
19 |
Don't suggest anything if there are no good suggestions.
|
20 |
Make sure the suggestions are valid completions of the text! No need for them to complete the text completely.
|
21 |
Suggest only up to 5 works ahead. The scheme of your answer should be "answer1;answer2;answer3" (return between 0 to 4 answers).
|
22 |
+
Answers should be only the completions themselves.
|
23 |
+
|
24 |
+
Examples:
|
25 |
+
(1)
|
26 |
+
User: "Help me write a sentiment analysis pipeline"
|
27 |
+
Assistant: "using huggingface;using NLTK;using python"
|
28 |
+
|
29 |
+
(2)
|
30 |
+
User: "My name is"
|
31 |
+
Assistant: "" (nothing much to contribute at this point. return nothing)
|
32 |
+
|
33 |
+
(3)
|
34 |
+
User: "Help me find a present for my"
|
35 |
+
Assistant: "girlfriend;mother;father;friend"
|
36 |
'''
|
37 |
|
38 |
+
|
39 |
+
@spaces.GPU
|
40 |
+
def get_past_key_values(system_prompt):
|
41 |
+
messages = [{'role': 'system', 'content': system}]
|
42 |
+
tokenized = tokenizer.apply_chat_template(messages, return_tensors='pt')
|
43 |
+
|
44 |
+
# assert that this is indeed a prefix (TODO: make sure this is robust)
|
45 |
+
messages.append({'role': 'user', 'content': 'TEST'})
|
46 |
+
tokenized_test = tokenizer.apply_chat_template(messages, return_tensors='pt')
|
47 |
+
assert (tokenized_test[:, :tokenized.shape[1]] == tokenized).all().cpu().item()
|
48 |
+
|
49 |
+
return model(**tokenized.to(model.device)).past_key_values
|
50 |
+
|
51 |
+
|
52 |
@spaces.GPU
|
53 |
def generate(text):
|
54 |
messages = [
|
|
|
60 |
|
61 |
|
62 |
if __name__ == "__main__":
|
63 |
+
past_key_values = get_past_key_values(system_prompt)
|
64 |
demo = gr.Interface(generate, inputs="textbox", outputs="textbox")
|
65 |
demo.launch()
|