diff --git a/notebooks/hugging-face-hub/hugging-face-hub.ipynb b/notebooks/hugging-face-hub/hugging-face-hub.ipynb
index 14604dfdf72..3a8bc2d1f9e 100644
--- a/notebooks/hugging-face-hub/hugging-face-hub.ipynb
+++ b/notebooks/hugging-face-hub/hugging-face-hub.ipynb
@@ -1,7 +1,6 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -36,7 +35,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -48,7 +46,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -58,7 +55,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -68,7 +65,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -92,7 +88,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -114,7 +109,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']\n",
+      "/home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
+      "  warnings.warn(\n",
+      "Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
       "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
       "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
      ]
@@ -131,7 +128,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -176,7 +172,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -203,7 +198,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -212,7 +206,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -227,12 +220,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "076e75b32a964983a4a6df36c1c3d1e0",
+       "model_id": "d844663d421c4ea9a448d5d44be7f961",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO')"
+       "Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO')"
       ]
      },
      "execution_count": 6,
@@ -256,7 +249,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -290,7 +282,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -305,7 +296,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -318,7 +308,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -345,8 +334,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n",
+      "\u001b[33mDEPRECATION: torchsde 0.2.5 has a non-standard dependency specifier numpy>=1.19.*; python_version >= \"3.7\". pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of torchsde or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n",
+      "\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.1.2\u001b[0m\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
       "Note: you may need to restart the kernel to use updated packages.\n"
      ]
@@ -357,7 +347,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -375,13 +364,6 @@
    "execution_count": 9,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
-     ]
-    },
     {
      "name": "stderr",
      "output_type": "stream",
@@ -390,11 +372,31 @@
       "To disable this warning, you can either:\n",
       "\t- Avoid using `tokenizers` before the fork if possible\n",
       "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
+      "2024-07-17 09:40:17.150496: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+      "2024-07-17 09:40:17.152256: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2024-07-17 09:40:17.187913: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2024-07-17 09:40:17.188455: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2024-07-17 09:40:17.937510: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
+      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
+      "    PyTorch 2.0.1+cu118 with CUDA 1108 (you have 2.3.0+cpu)\n",
+      "    Python  3.8.18 (you have 3.8.10)\n",
+      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
+      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
+      "  Set XFORMERS_MORE_DETAILS=1 for more details\n",
       "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
       "To disable this warning, you can either:\n",
       "\t- Avoid using `tokenizers` before the fork if possible\n",
       "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
-      "No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'\n"
+      "/home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.\n",
+      "  warn(\"The installed version of bitsandbytes was compiled without GPU support. \"\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32\n"
      ]
     }
    ],
@@ -403,7 +405,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -422,15 +423,27 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Framework not specified. Using pt to export to ONNX.\n",
-      "Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']\n",
+      "Framework not specified. Using pt to export the model.\n",
+      "Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
       "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
       "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-      "Using the export variant default. Available variants are:\n",
-      "    - default: The default ONNX variant.\n",
-      "Using framework PyTorch: 2.1.0+cpu\n",
+      "Using framework PyTorch: 2.3.0+cpu\n",
       "Overriding 1 configuration item(s)\n",
-      "\t- use_cache -> False\n",
+      "\t- use_cache -> False\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[ WARNING ]  Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s.\n",
       "Compiling the model to AUTO ...\n"
      ]
     }
@@ -443,7 +456,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -461,7 +473,7 @@
     "\n",
     "You can find a mapping between tasks and model classes in Optimum TaskManager [documentation](https://huggingface.co/docs/optimum/exporters/task_manager).\n",
     "\n",
-    "Additionally, you can specify weights compression `--fp16` for the compression model to FP16 and `--int8` for the compression model to INT8. Please note, that for INT8, it is necessary to install nncf.\n",
+    "Additionally, you can specify weights compression using `--weight-format` argument with one of following options: `fp32`, `fp16`, `int8` and `int4`. Fro int8 and int4 nncf will be used for  weight compression.\n",
     "\n",
     "Full list of supported arguments available via `--help`"
    ]
@@ -485,11 +497,21 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2024-07-17 09:40:40.173915: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
       "usage: optimum-cli export openvino [-h] -m MODEL [--task TASK]\n",
-      "                                   [--cache_dir CACHE_DIR]\n",
       "                                   [--framework {pt,tf}] [--trust-remote-code]\n",
-      "                                   [--pad-token-id PAD_TOKEN_ID] [--fp16]\n",
-      "                                   [--int8]\n",
+      "                                   [--weight-format {fp32,fp16,int8,int4,int4_sym_g128,int4_asym_g128,int4_sym_g64,int4_asym_g64}]\n",
+      "                                   [--library {transformers,diffusers,timm,sentence_transformers}]\n",
+      "                                   [--cache_dir CACHE_DIR]\n",
+      "                                   [--pad-token-id PAD_TOKEN_ID]\n",
+      "                                   [--ratio RATIO] [--sym]\n",
+      "                                   [--group-size GROUP_SIZE]\n",
+      "                                   [--dataset DATASET] [--all-layers] [--awq]\n",
+      "                                   [--scale-estimation]\n",
+      "                                   [--sensitivity-metric SENSITIVITY_METRIC]\n",
+      "                                   [--num-samples NUM_SAMPLES]\n",
+      "                                   [--disable-stateful]\n",
+      "                                   [--disable-convert-tokenizer]\n",
       "                                   output\n",
       "\n",
       "optional arguments:\n",
@@ -506,21 +528,20 @@
       "  --task TASK           The task to export the model for. If not specified,\n",
       "                        the task will be auto-inferred based on the model.\n",
       "                        Available tasks depend on the model, but are among:\n",
-      "                        ['semantic-segmentation', 'zero-shot-image-\n",
-      "                        classification', 'text-generation', 'stable-diffusion-\n",
-      "                        xl', 'image-classification', 'image-segmentation',\n",
-      "                        'conversational', 'audio-classification', 'text2text-\n",
-      "                        generation', 'automatic-speech-recognition', 'text-to-\n",
-      "                        audio', 'audio-frame-classification', 'question-\n",
-      "                        answering', 'stable-diffusion', 'mask-generation',\n",
-      "                        'zero-shot-object-detection', 'token-classification',\n",
-      "                        'image-to-text', 'feature-extraction', 'audio-\n",
-      "                        xvector', 'text-classification', 'fill-mask', 'object-\n",
-      "                        detection', 'multiple-choice', 'masked-im']. For\n",
-      "                        decoder models, use `xxx-with-past` to export the\n",
-      "                        model using past key values in the decoder.\n",
-      "  --cache_dir CACHE_DIR\n",
-      "                        Path indicating where to store cache.\n",
+      "                        ['image-to-text', 'audio-frame-classification', 'text-\n",
+      "                        generation', 'fill-mask', 'image-segmentation',\n",
+      "                        'audio-xvector', 'semantic-segmentation', 'depth-\n",
+      "                        estimation', 'token-classification', 'zero-shot-image-\n",
+      "                        classification', 'zero-shot-object-detection',\n",
+      "                        'text2text-generation', 'sentence-similarity',\n",
+      "                        'feature-extraction', 'conversational', 'image-\n",
+      "                        classification', 'text-to-audio', 'stable-diffusion',\n",
+      "                        'image-to-image', 'text-classification', 'automatic-\n",
+      "                        speech-recognition', 'multiple-choice', 'masked-im',\n",
+      "                        'mask-generation', 'question-answering', 'object-\n",
+      "                        detection', 'audio-classification', 'stable-diffusion-\n",
+      "                        xl']. For decoder models, use `xxx-with-past` to\n",
+      "                        export the model using past key values in the decoder.\n",
       "  --framework {pt,tf}   The framework to use for the export. If not provided,\n",
       "                        will attempt to use the local checkpoint's original\n",
       "                        framework or what is available in the environment.\n",
@@ -529,12 +550,77 @@
       "                        for repositories you trust and in which you have read\n",
       "                        the code, as it will execute on your local machine\n",
       "                        arbitrary code present in the model repository.\n",
+      "  --weight-format {fp32,fp16,int8,int4,int4_sym_g128,int4_asym_g128,int4_sym_g64,int4_asym_g64}\n",
+      "                        he weight format of the exported model.\n",
+      "  --library {transformers,diffusers,timm,sentence_transformers}\n",
+      "                        The library used to load the model before export. If\n",
+      "                        not provided, will attempt to infer the local\n",
+      "                        checkpoint's library\n",
+      "  --cache_dir CACHE_DIR\n",
+      "                        The path to a directory in which the downloaded model\n",
+      "                        should be cached if the standard cache should not be\n",
+      "                        used.\n",
       "  --pad-token-id PAD_TOKEN_ID\n",
       "                        This is needed by some models, for some tasks. If not\n",
       "                        provided, will attempt to use the tokenizer to guess\n",
       "                        it.\n",
-      "  --fp16                Compress weights to fp16\n",
-      "  --int8                Compress weights to int8\n"
+      "  --ratio RATIO         A parameter used when applying 4-bit quantization to\n",
+      "                        control the ratio between 4-bit and 8-bit\n",
+      "                        quantization. If set to 0.8, 80% of the layers will be\n",
+      "                        quantized to int4 while 20% will be quantized to int8.\n",
+      "                        This helps to achieve better accuracy at the sacrifice\n",
+      "                        of the model size and inference latency. Default value\n",
+      "                        is 1.0.\n",
+      "  --sym                 Whether to apply symmetric quantization\n",
+      "  --group-size GROUP_SIZE\n",
+      "                        The group size to use for quantization. Recommended\n",
+      "                        value is 128 and -1 uses per-column quantization.\n",
+      "  --dataset DATASET     The dataset used for data-aware compression or\n",
+      "                        quantization with NNCF. You can use the one from the\n",
+      "                        list ['wikitext2','c4','c4-new'] for language models\n",
+      "                        or ['conceptual_captions','laion/220k-GPT4Vision-\n",
+      "                        captions-from-LIVIS','laion/filtered-wit'] for\n",
+      "                        diffusion models.\n",
+      "  --all-layers          Whether embeddings and last MatMul layers should be\n",
+      "                        compressed to INT4. If not provided an weight\n",
+      "                        compression is applied, they are compressed to INT8.\n",
+      "  --awq                 Whether to apply AWQ algorithm. AWQ improves\n",
+      "                        generation quality of INT4-compressed LLMs, but\n",
+      "                        requires additional time for tuning weights on a\n",
+      "                        calibration dataset. To run AWQ, please also provide a\n",
+      "                        dataset argument. Note: it's possible that there will\n",
+      "                        be no matching patterns in the model to apply AWQ, in\n",
+      "                        such case it will be skipped.\n",
+      "  --scale-estimation    Indicates whether to apply a scale estimation\n",
+      "                        algorithm that minimizes the L2 error between the\n",
+      "                        original and compressed layers. Providing a dataset is\n",
+      "                        required to run scale estimation. Please note, that\n",
+      "                        applying scale estimation takes additional memory and\n",
+      "                        time.\n",
+      "  --sensitivity-metric SENSITIVITY_METRIC\n",
+      "                        The sensitivity metric for assigning quantization\n",
+      "                        precision to layers. Can be one of the following:\n",
+      "                        ['weight_quantization_error',\n",
+      "                        'hessian_input_activation',\n",
+      "                        'mean_activation_variance', 'max_activation_variance',\n",
+      "                        'mean_activation_magnitude'].\n",
+      "  --num-samples NUM_SAMPLES\n",
+      "                        The maximum number of samples to take from the dataset\n",
+      "                        for quantization.\n",
+      "  --disable-stateful    Disable stateful converted models, stateless models\n",
+      "                        will be generated instead. Stateful models are\n",
+      "                        produced by default when this key is not used. In\n",
+      "                        stateful models all kv-cache inputs and outputs are\n",
+      "                        hidden in the model and are not exposed as model\n",
+      "                        inputs and outputs. If --disable-stateful option is\n",
+      "                        used, it may result in sub-optimal inference\n",
+      "                        performance. Use it when you intentionally want to use\n",
+      "                        a stateless model, for example, to be compatible with\n",
+      "                        existing OpenVINO native inference code that expects\n",
+      "                        kv-cache inputs and outputs in the model.\n",
+      "  --disable-convert-tokenizer\n",
+      "                        Do not add converted tokenizer and detokenizer\n",
+      "                        OpenVINO models.\n"
      ]
     }
    ],
@@ -543,7 +629,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -569,24 +654,31 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Framework not specified. Using pt to export to ONNX.\n",
-      "Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']\n",
+      "2024-07-17 09:40:45.950526: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
+      "WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:\n",
+      "    PyTorch 2.0.1+cu118 with CUDA 1108 (you have 2.3.0+cpu)\n",
+      "    Python  3.8.18 (you have 3.8.10)\n",
+      "  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)\n",
+      "  Memory-efficient attention, SwiGLU, sparse and more won't be available.\n",
+      "  Set XFORMERS_MORE_DETAILS=1 for more details\n",
+      "/home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.\n",
+      "  warn(\"The installed version of bitsandbytes was compiled without GPU support. \"\n",
+      "/home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32\n",
+      "Framework not specified. Using pt to export the model.\n",
+      "Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
       "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
       "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-      "Using the export variant default. Available variants are:\n",
-      "    - default: The default ONNX variant.\n",
-      "Using framework PyTorch: 2.1.0+cpu\n",
+      "Using framework PyTorch: 2.3.0+cpu\n",
       "Overriding 1 configuration item(s)\n",
       "\t- use_cache -> False\n"
      ]
     }
    ],
    "source": [
-    "!optimum-cli export openvino --model $MODEL --task text-classification --fp16 models/optimum_model/fp16"
+    "!optimum-cli export openvino --model $MODEL --task text-classification --weight-format fp16 models/optimum_model/fp16"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -602,8 +694,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Compiling the model to AUTO ...\n",
-      "Setting OpenVINO CACHE_DIR to models/optimum_model/fp16/model_cache\n"
+      "Compiling the model to AUTO ...\n"
      ]
     }
    ],
@@ -612,7 +703,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -620,7 +710,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -654,7 +743,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -703,7 +791,39 @@
   },
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {
-    "state": {},
+    "state": {
+     "087c01ae5c9c44f3a1a0730b2d856f97": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "DescriptionStyleModel",
+      "state": {
+       "description_width": ""
+      }
+     },
+     "0dc8780d967e49ad89fc4d76f64a1b06": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     },
+     "d844663d421c4ea9a448d5d44be7f961": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "DropdownModel",
+      "state": {
+       "_options_labels": [
+        "CPU",
+        "GPU.0",
+        "GPU.1",
+        "AUTO"
+       ],
+       "description": "Device:",
+       "index": 3,
+       "layout": "IPY_MODEL_0dc8780d967e49ad89fc4d76f64a1b06",
+       "style": "IPY_MODEL_087c01ae5c9c44f3a1a0730b2d856f97"
+      }
+     }
+    },
     "version_major": 2,
     "version_minor": 0
    }