Spaces:

dar-tau
/

run_inference

Sleeping

dar-tau commited on Jun 8, 2024

Commit

6b3281f

verified ·

1 Parent(s): 497a54c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,6 +21,10 @@ prompt_format = '''<|im_start|>system
 <|im_start|>assistant
 '''
 system_prompt = '''You are given a partial input text for another AI chat interface.
 Propose auto-completion to the text. You have several roles:
@@ -89,12 +93,12 @@ def detach_past_kv(past_kv):
 @spaces.GPU
 def set_past_key_values():
     model, tokenizer = pipe.model, pipe.tokenizer
-    tokenized = tokenizer.apply_chat_template(start_messages, return_tensors='pt')
     # Check that this is indeed a prefix of the entire message
-    test_messages = [*start_messages, {'role': 'user', 'content': 'Hello World!'}]
-    tokenized_test = tokenizer.apply_chat_template(test_messages, return_tensors='pt')
-    assert (tokenized_test[:, :tokenized.shape[1]] == tokenized).all().cpu().item()
     return detach_past_kv(model(tokenized.to(model.device)).past_key_values)
@@ -118,8 +122,7 @@ def generate(text, past_key_values):
 if __name__ == "__main__":
     with torch.no_grad():
-        # past_key_values = set_past_key_values()
-        # print(f'{past_key_values=}')
-        demo = gr.Interface(partial(generate, past_key_values=None),
                             inputs="textbox", outputs="textbox")
         demo.launch()

 <|im_start|>assistant
 '''
+system_only_prompt_format = '''<|im_start|>system
+{system_message}<|im_end|>
+<|im_start|>user
+'''
 system_prompt = '''You are given a partial input text for another AI chat interface.
 Propose auto-completion to the text. You have several roles:
 @spaces.GPU
 def set_past_key_values():
     model, tokenizer = pipe.model, pipe.tokenizer
+    tokenized = tokenizer(system_only_prompt_format.format(system_message=system_prompt))
+    # tokenized = tokenizer.apply_chat_template(start_messages, return_tensors='pt')
     # Check that this is indeed a prefix of the entire message
+    # test_messages = [*start_messages, {'role': 'user', 'content': 'Hello World!'}]
+    # tokenized_test = tokenizer.apply_chat_template(test_messages, return_tensors='pt')
+    # assert (tokenized_test[:, :tokenized.shape[1]] == tokenized).all().cpu().item()
     return detach_past_kv(model(tokenized.to(model.device)).past_key_values)
 if __name__ == "__main__":
     with torch.no_grad():
+        past_key_values = set_past_key_values()
+        demo = gr.Interface(partial(generate, past_key_values=past_key_values),
                             inputs="textbox", outputs="textbox")
         demo.launch()