Skip to content

Commit

Permalink
new link
Browse files Browse the repository at this point in the history
  • Loading branch information
gante committed Sep 18, 2024
1 parent f250b38 commit e556965
Show file tree
Hide file tree
Showing 29 changed files with 58 additions and 58 deletions.
4 changes: 2 additions & 2 deletions docs/source/en/kv_cache.md
Original file line number Diff line number Diff line change
Expand Up @@ -403,13 +403,13 @@ Sometimes you would want to first fill-in cache object with key/values for certa
```


## Converting to/from the legacy cache format
## Legacy cache format

Prior to the introduction of the `Cache` object, the cache of LLMs used to be a tuple of tuples of tensors. The legacy
format has a dynamic size, growing as we generate text -- very similar to `DynamicCache`. If your project depend on
this legacy format, you can seamlessly convert it to a `DynamicCache` and back.

```py
```python
>>> import torch
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache

Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/bloom/modeling_bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,8 +696,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

batch_size, seq_length, _ = inputs_embeds.shape
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/codegen/modeling_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,8 +536,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

seq_length = inputs_embeds.shape[1]
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/cohere/modeling_cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -920,8 +920,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if cache_position is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/dbrx/modeling_dbrx.py
Original file line number Diff line number Diff line change
Expand Up @@ -1069,8 +1069,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if cache_position is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/falcon/modeling_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -1041,8 +1041,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

# Compute alibi tensor: check build_alibi_tensor documentation
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/gemma/diff_gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,8 +486,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if cache_position is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/gemma/modeling_gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,8 +838,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if cache_position is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/git/modeling_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

all_hidden_states = () if output_hidden_states else None
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/gpt_neo/modeling_gpt_neo.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,8 +751,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

seq_length = inputs_embeds.shape[1]
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/gpt_neox/modeling_gpt_neox.py
Original file line number Diff line number Diff line change
Expand Up @@ -953,8 +953,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

seq_length = inputs_embeds.shape[1]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -673,8 +673,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

seq_length = inputs_embeds.shape[1]
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/gptj/modeling_gptj.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,8 +823,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

seq_length = inputs_embeds.shape[1]
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/granite/modeling_granite.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,8 +844,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if cache_position is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/idefics/modeling_idefics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1249,8 +1249,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

batch_size, seq_length, _ = inputs_embeds.shape
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/idefics2/modeling_idefics2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1355,8 +1355,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)
past_seen_tokens = past_key_values.get_seq_length()

Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/jetmoe/modeling_jetmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1043,8 +1043,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if cache_position is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/llama/modeling_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -954,8 +954,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if cache_position is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/mistral/modeling_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,8 +772,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if cache_position is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/mixtral/modeling_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -1028,8 +1028,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if inputs_embeds is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/olmo/modeling_olmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,8 +876,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if cache_position is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/olmoe/modeling_olmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1017,8 +1017,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if cache_position is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/persimmon/modeling_persimmon.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,8 +695,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if inputs_embeds is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/phi/modeling_phi.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,8 +986,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if inputs_embeds is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/phi3/modeling_phi3.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,8 +1013,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if inputs_embeds is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/qwen2/modeling_qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,8 +925,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if inputs_embeds is None:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/qwen2_moe/modeling_qwen2_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1089,8 +1089,8 @@ def forward(
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
logger.warning_once(
"We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and "
"will be removed in v4.47. Please use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)"
"will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class "
"(https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)"
)

if inputs_embeds is None:
Expand Down
Loading

0 comments on commit e556965

Please sign in to comment.