Spaces:

rphrp1985
/

zerogpu

Running on Zero

rphrp1985 commited on Jun 12, 2024

Commit

5469709

verified ·

1 Parent(s): 6ac645f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -77,8 +77,8 @@ def respond(
     messages = [{"role": "user", "content": "Hello, how are you?"}]
     input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
 ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
-    gen_tokens = model.generate(
     input_ids,
     max_new_tokens=100,
     # do_sample=True,

     messages = [{"role": "user", "content": "Hello, how are you?"}]
     input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
 ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+    with autocast():
+        gen_tokens = model.generate(
     input_ids,
     max_new_tokens=100,
     # do_sample=True,