Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
·
f42e9e5
1
Parent(s):
09d7140
Add padding="longest"
Browse files- main.py +3 -0
- pipeline.py +0 -1
main.py
CHANGED
@@ -164,6 +164,9 @@ def GetChatModel(compute_mode, ckpt_dir=None):
|
|
164 |
return_full_text=False,
|
165 |
# It seems that max_new_tokens has to be specified here, not in .invoke()
|
166 |
max_new_tokens=2000,
|
|
|
|
|
|
|
167 |
)
|
168 |
# We need the task so HuggingFacePipeline can deal with our class
|
169 |
pipe.task = "text-generation"
|
|
|
164 |
return_full_text=False,
|
165 |
# It seems that max_new_tokens has to be specified here, not in .invoke()
|
166 |
max_new_tokens=2000,
|
167 |
+
# Use padding for FlashAttention alignment
|
168 |
+
# https://github.com/google-deepmind/gemma/issues/169
|
169 |
+
padding="longest",
|
170 |
)
|
171 |
# We need the task so HuggingFacePipeline can deal with our class
|
172 |
pipe.task = "text-generation"
|
pipeline.py
CHANGED
@@ -22,7 +22,6 @@ class MyTextGenerationPipeline(TextGenerationPipeline):
|
|
22 |
continue_final_message=None,
|
23 |
**generate_kwargs,
|
24 |
):
|
25 |
-
print(f"PADDING: {padding}")
|
26 |
# Only set non-None tokenizer kwargs, so as to rely on the tokenizer's defaults
|
27 |
tokenizer_kwargs = {
|
28 |
"add_special_tokens": add_special_tokens,
|
|
|
22 |
continue_final_message=None,
|
23 |
**generate_kwargs,
|
24 |
):
|
|
|
25 |
# Only set non-None tokenizer kwargs, so as to rely on the tokenizer's defaults
|
26 |
tokenizer_kwargs = {
|
27 |
"add_special_tokens": add_special_tokens,
|