d-matrix
/

Llama3-8b

Model card Files Files and versions Community

zifei9 commited on 7 days ago

Commit

5ffc04c

·

verified ·

1 Parent(s): 6c02172

Update modeling_llama.py

updating based on transformers==4.49

Files changed (1) hide show

modeling_llama.py +3 -6

modeling_llama.py CHANGED Viewed

@@ -985,7 +985,8 @@ class LlamaModel(LlamaPreTrainedModel):
             if past_key_values is not None and not isinstance(
                 past_key_values, StaticCache
             ):
-                past_key_values = DynamicCache.from_legacy_cache(past_key_values)
                 past_seen_tokens = past_key_values.get_seq_length()
         if cache_position is None:
@@ -1056,11 +1057,7 @@ class LlamaModel(LlamaPreTrainedModel):
         next_cache = None
         if use_cache:
-            next_cache = (
-                next_decoder_cache.to_legacy_cache()
-                if isinstance(next_decoder_cache, Cache)
-                else next_decoder_cache
-            )
         if not return_dict:
             return tuple(
                 v

             if past_key_values is not None and not isinstance(
                 past_key_values, StaticCache
             ):
+                if not isinstance(past_key_values, DynamicCache):
+                    past_key_values = DynamicCache.from_legacy_cache(past_key_values)
                 past_seen_tokens = past_key_values.get_seq_length()
         if cache_position is None:
         next_cache = None
         if use_cache:
+            next_cache = next_decoder_cache
         if not return_dict:
             return tuple(
                 v