d-matrix
/

Llama3-8b

zifei9 commited on Feb 27

Commit

5981ddd

verified ·

1 Parent(s): 4ea6b44

Update modeling_llama.py

updating cache methods due to change in transformers

Files changed (1) hide show

modeling_llama.py CHANGED Viewed

@@ -1218,8 +1218,8 @@ class LlamaForCausalLM(LlamaPreTrainedModel):
             if isinstance(past_key_values, Cache):
                 past_length = cache_position[0] if cache_position is not None else past_key_values.get_seq_length()
                 max_cache_length = (
-                    torch.tensor(past_key_values.get_max_length(), device=input_ids.device)
-                    if past_key_values.get_max_length() is not None
                     else None
                 )
                 cache_length = past_length if max_cache_length is None else torch.min(max_cache_length, past_length)

             if isinstance(past_key_values, Cache):
                 past_length = cache_position[0] if cache_position is not None else past_key_values.get_seq_length()
                 max_cache_length = (
+                    torch.tensor(past_key_values.get_max_cache_shape(), device=input_ids.device)
+                    if past_key_values.get_max_cache_shape() is not None
                     else None
                 )
                 cache_length = past_length if max_cache_length is None else torch.min(max_cache_length, past_length)