Spaces:

jedick
/

R-help-chat

Running on Zero

jedick commited on 20 days ago

Commit

f42e9e5

1 Parent(s): 09d7140

Add padding="longest"

Files changed (2) hide show

main.py CHANGED Viewed

@@ -164,6 +164,9 @@ def GetChatModel(compute_mode, ckpt_dir=None):
             return_full_text=False,
             # It seems that max_new_tokens has to be specified here, not in .invoke()
             max_new_tokens=2000,
         )
         # We need the task so HuggingFacePipeline can deal with our class
         pipe.task = "text-generation"

             return_full_text=False,
             # It seems that max_new_tokens has to be specified here, not in .invoke()
             max_new_tokens=2000,
+            # Use padding for FlashAttention alignment
+            # https://github.com/google-deepmind/gemma/issues/169
+            padding="longest",
         )
         # We need the task so HuggingFacePipeline can deal with our class
         pipe.task = "text-generation"

pipeline.py CHANGED Viewed

@@ -22,7 +22,6 @@ class MyTextGenerationPipeline(TextGenerationPipeline):
         continue_final_message=None,
         **generate_kwargs,
     ):
-        print(f"PADDING: {padding}")
         # Only set non-None tokenizer kwargs, so as to rely on the tokenizer's defaults
         tokenizer_kwargs = {
             "add_special_tokens": add_special_tokens,

         continue_final_message=None,
         **generate_kwargs,
     ):
         # Only set non-None tokenizer kwargs, so as to rely on the tokenizer's defaults
         tokenizer_kwargs = {
             "add_special_tokens": add_special_tokens,