diff --git a/docs/parameters.md b/docs/parameters.md index 95b4bd5885..9bf2f0acaa 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -125,7 +125,7 @@ Task specific parameters for different tasks (text generation/image generation/e | `--pipeline_type` | `string` | Type of the pipeline to be used. Choices: `LM`, `LM_CB`, `VLM`, `VLM_CB`, `AUTO`. Default: `AUTO`. | | `--enable_prefix_caching` | `bool` | Enables algorithm to cache the prompt tokens. Default: true. | | `--max_num_batched_tokens` | `integer` | The maximum number of tokens that can be batched together. | -| `--cache_size` | `integer` | Cache size in GB. Default: 10. | +| `--cache_size` | `integer` | Cache size in GB. Default: 1. | | `--draft_source_model` | `string` | HF model name or path to the local folder with PyTorch or OpenVINO draft model. | | `--dynamic_split_fuse` | `bool` | Enables dynamic split fuse algorithm. Default: true. | | `--max_prompt_len` | `integer` | Sets NPU specific property for maximum number of tokens in the prompt. | diff --git a/src/graph_export/graph_cli_parser.cpp b/src/graph_export/graph_cli_parser.cpp index 4e61bf689c..3cd79ddafd 100644 --- a/src/graph_export/graph_cli_parser.cpp +++ b/src/graph_export/graph_cli_parser.cpp @@ -57,8 +57,8 @@ void GraphCLIParser::createOptions() { cxxopts::value(), "MAX_NUM_BATCHED_TOKENS") ("cache_size", - "cache size in GB, default is 10.", - cxxopts::value()->default_value("10"), + "cache size in GB, default is 1.", + cxxopts::value()->default_value("1"), "CACHE_SIZE") ("draft_source_model", "HF model name or path to the local folder with PyTorch or OpenVINO draft model.",