Update modeling_llama.py
Browse filesupdating based on transformers==4.49
- modeling_llama.py +3 -6
modeling_llama.py
CHANGED
@@ -985,7 +985,8 @@ class LlamaModel(LlamaPreTrainedModel):
|
|
985 |
if past_key_values is not None and not isinstance(
|
986 |
past_key_values, StaticCache
|
987 |
):
|
988 |
-
|
|
|
989 |
past_seen_tokens = past_key_values.get_seq_length()
|
990 |
|
991 |
if cache_position is None:
|
@@ -1056,11 +1057,7 @@ class LlamaModel(LlamaPreTrainedModel):
|
|
1056 |
|
1057 |
next_cache = None
|
1058 |
if use_cache:
|
1059 |
-
next_cache =
|
1060 |
-
next_decoder_cache.to_legacy_cache()
|
1061 |
-
if isinstance(next_decoder_cache, Cache)
|
1062 |
-
else next_decoder_cache
|
1063 |
-
)
|
1064 |
if not return_dict:
|
1065 |
return tuple(
|
1066 |
v
|
|
|
985 |
if past_key_values is not None and not isinstance(
|
986 |
past_key_values, StaticCache
|
987 |
):
|
988 |
+
if not isinstance(past_key_values, DynamicCache):
|
989 |
+
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
|
990 |
past_seen_tokens = past_key_values.get_seq_length()
|
991 |
|
992 |
if cache_position is None:
|
|
|
1057 |
|
1058 |
next_cache = None
|
1059 |
if use_cache:
|
1060 |
+
next_cache = next_decoder_cache
|
|
|
|
|
|
|
|
|
1061 |
if not return_dict:
|
1062 |
return tuple(
|
1063 |
v
|