diff --git a/examples/.config/pytorch_optimize.json b/examples/.config/pytorch_optimize.json
index 64d229aed1f..e54bf868d0c 100644
--- a/examples/.config/pytorch_optimize.json
+++ b/examples/.config/pytorch_optimize.json
@@ -20,6 +20,27 @@
       }
     }
   },
+  "gpt_neo_clm_qat": {
+    "working_dir": "huggingface/pytorch/language-modeling/quantization",
+    "tune":{
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "gpt_neo_clm_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "gpt_neo_clm_qat",
+        "mode": "accuracy",
+        "batch_size": "8",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "bloom_text_static": {
     "working_dir": "huggingface/pytorch/text-generation/quantization",
     "tune":{
@@ -146,6 +167,27 @@
       }
     }
   },
+  "bert_mlm_qat": {
+    "working_dir": "huggingface/pytorch/language-modeling/quantization",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "bert_mlm_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "bert_mlm_qat",
+        "mode": "accuracy",
+        "batch_size": "8",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "bert_mlm_dynamic": {
     "working_dir": "huggingface/pytorch/language-modeling/quantization/",
     "tune": {
@@ -188,6 +230,27 @@
       }
     }
   },
+  "xlnet_plm_qat": {
+    "working_dir": "huggingface/pytorch/language-modeling/quantization/",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "xlnet_plm_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "xlnet_plm_qat",
+        "mode": "accuracy",
+        "batch_size": "8",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "xlnet_plm_dynamic": {
     "working_dir": "huggingface/pytorch/language-modeling/quantization/",
     "tune": {
@@ -294,6 +357,27 @@
       }
     }
   },
+  "bert_base_swag_qat": {
+    "working_dir": "huggingface/pytorch/multiple-choice/quantization/",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "bert_base_swag_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "bert_base_swag_qat",
+        "mode": "accuracy",
+        "batch_size": "64",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "bert_base_swag_dynamic": {
     "working_dir": "huggingface/pytorch/multiple-choice/quantization/",
     "tune": {
@@ -336,6 +420,27 @@
       }
     }
   },
+  "distilbert_base_squad_qat": {
+    "working_dir": "huggingface/pytorch/question-answering/quantization/",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "distilbert_base_squad_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "distilbert_base_squad_qat",
+        "mode": "accuracy",
+        "batch_size": "64",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "distilbert_base_squad_dynamic": {
     "working_dir": "huggingface/pytorch/question-answering/quantization/",
     "tune": {
@@ -819,6 +924,27 @@
       }
     }
   },
+  "distilbert_base_ner_qat": {
+    "working_dir": "huggingface/pytorch/token-classification/quantization/",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "distilbert_base_ner_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "distilbert_base_ner_qat",
+        "mode": "accuracy",
+        "batch_size": "64",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "distilbert_base_ner_dynamic": {
     "working_dir": "huggingface/pytorch/token-classification/quantization/",
     "tune": {
diff --git a/examples/huggingface/pytorch/image-classification/quantization/README.md b/examples/huggingface/pytorch/image-classification/quantization/README.md
index 3f810abf202..de2564ecdd8 100644
--- a/examples/huggingface/pytorch/image-classification/quantization/README.md
+++ b/examples/huggingface/pytorch/image-classification/quantization/README.md
@@ -25,4 +25,4 @@ run run_benchmark.sh
 
 |Dataset|Pretrained model|PostTrainingDynamic | PostTrainingStatic | QuantizationAwareTraining
 |---|------------------------------------|---|---|---
-|imagenet-1k|google/vit-base-patch16-224| ✅| ✅| N/✅|
+|imagenet-1k|google/vit-base-patch16-224| ✅| ✅| N/A|
diff --git a/examples/huggingface/pytorch/image-classification/quantization/run_image_classification.py b/examples/huggingface/pytorch/image-classification/quantization/run_image_classification.py
index b5ee66fd863..e7d3a8765f9 100644
--- a/examples/huggingface/pytorch/image-classification/quantization/run_image_classification.py
+++ b/examples/huggingface/pytorch/image-classification/quantization/run_image_classification.py
@@ -408,6 +408,9 @@ def val_transforms(example_batch):
         # Set the validation transforms
         dataset["validation"].set_transform(val_transforms)
 
+    metric_name = optim_args.metric_name
+    training_args.metric_for_best_model = metric_name
+
     # Initalize our trainer
     trainer = NLPTrainer(
         model=model,
@@ -418,8 +421,6 @@ def val_transforms(example_batch):
         tokenizer=feature_extractor,
         data_collator=collate_fn,
     )
-    
-    metric_name = optim_args.metric_name
 
     if optim_args.tune:
 
diff --git a/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt b/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt
index 237bf089730..c56f8e8d328 100644
--- a/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt
+++ b/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt
@@ -1,9 +1,9 @@
 accelerate
-datasets >= 1.8.0
 protobuf
 sentencepiece != 0.1.92
-torch >= 1.10.0
-transformers>=4.12.0
+datasets >= 1.1.3
+torch >= 1.10
+transformers
 wandb
 neural_compressor
 git+https://github.com/EleutherAI/lm-evaluation-harness.git@83dbfbf6070324f3e5872f63e49d49ff7ef4c9b3
diff --git a/examples/huggingface/pytorch/language-modeling/quantization/run_benchmark.sh b/examples/huggingface/pytorch/language-modeling/quantization/run_benchmark.sh
index 841f0356c20..a30a766e483 100644
--- a/examples/huggingface/pytorch/language-modeling/quantization/run_benchmark.sh
+++ b/examples/huggingface/pytorch/language-modeling/quantization/run_benchmark.sh
@@ -135,6 +135,21 @@ function run_benchmark {
         script="run_clm.py"
         DATASET_NAME="lambada"
         model_name_or_path="bigscience/bloom-560m"
+    elif [ "${topology}" = "gpt_neo_clm_qat" ]; then
+        script="run_clm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="EleutherAI/gpt-neo-125M"
+    elif [ "${topology}" = "bert_mlm_qat" ]; then
+        script="run_mlm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="bert-base-uncased"
+    elif [ "${topology}" = "xlnet_plm_qat" ]; then
+        script="run_plm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="xlnet-base-cased"
     fi
     
     if [[ ${int8} == "true" ]]; then
diff --git a/examples/huggingface/pytorch/language-modeling/quantization/run_clm.py b/examples/huggingface/pytorch/language-modeling/quantization/run_clm.py
index bfaf7486a1b..9add032aeb8 100644
--- a/examples/huggingface/pytorch/language-modeling/quantization/run_clm.py
+++ b/examples/huggingface/pytorch/language-modeling/quantization/run_clm.py
@@ -541,6 +541,9 @@ def compute_metrics(eval_preds):
             preds = preds[:, :-1].reshape(-1)
             return metric.compute(predictions=preds, references=labels)
 
+    metric_name = optim_args.metric_name
+    training_args.metric_for_best_model = metric_name
+
     # Initialize our Trainer
     trainer = NLPTrainer(
         model=model,
@@ -555,7 +558,6 @@ def compute_metrics(eval_preds):
         if training_args.do_eval and not is_torch_tpu_available()
         else None,
     )
-    metric_name = optim_args.metric_name
 
     if optim_args.tune:
 
diff --git a/examples/huggingface/pytorch/language-modeling/quantization/run_mlm.py b/examples/huggingface/pytorch/language-modeling/quantization/run_mlm.py
index d216603d972..362a5a556fa 100644
--- a/examples/huggingface/pytorch/language-modeling/quantization/run_mlm.py
+++ b/examples/huggingface/pytorch/language-modeling/quantization/run_mlm.py
@@ -561,6 +561,8 @@ def compute_metrics(eval_preds):
         mlm_probability=data_args.mlm_probability,
         pad_to_multiple_of=8 if pad_to_multiple_of_8 else None,
     )
+    metric_name = optim_args.metric_name
+    training_args.metric_for_best_model = metric_name
 
     # Initialize our Trainer
     trainer = NLPTrainer(
@@ -575,7 +577,6 @@ def compute_metrics(eval_preds):
         if training_args.do_eval and not is_torch_tpu_available()
         else None,
     )
-    metric_name = optim_args.metric_name
 
     if optim_args.tune:
 
diff --git a/examples/huggingface/pytorch/language-modeling/quantization/run_plm.py b/examples/huggingface/pytorch/language-modeling/quantization/run_plm.py
index fff58b12b5b..ffd1223da5d 100644
--- a/examples/huggingface/pytorch/language-modeling/quantization/run_plm.py
+++ b/examples/huggingface/pytorch/language-modeling/quantization/run_plm.py
@@ -511,6 +511,8 @@ def group_texts(examples):
         plm_probability=data_args.plm_probability,
         max_span_length=data_args.max_span_length,
     )
+    metric_name = optim_args.metric_name
+    training_args.metric_for_best_model = metric_name
 
     # Initialize our Trainer
     trainer = NLPTrainer(
@@ -521,7 +523,6 @@ def group_texts(examples):
         tokenizer=tokenizer,
         data_collator=data_collator,
     )
-    metric_name = optim_args.metric_name
 
     if optim_args.tune:
 
diff --git a/examples/huggingface/pytorch/language-modeling/quantization/run_tuning.sh b/examples/huggingface/pytorch/language-modeling/quantization/run_tuning.sh
index 7c04ef3e54d..b29da1c7b6a 100644
--- a/examples/huggingface/pytorch/language-modeling/quantization/run_tuning.sh
+++ b/examples/huggingface/pytorch/language-modeling/quantization/run_tuning.sh
@@ -56,6 +56,21 @@ function run_tuning {
         DATASET_CONFIG_NAME="wikitext-2-raw-v1"
         model_name_or_path="EleutherAI/gpt-neo-125M"
         approach="PostTrainingDynamic"
+    elif [ "${topology}" = "gpt_neo_clm_qat" ]; then
+        script="run_clm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="EleutherAI/gpt-neo-125M"
+        approach="QuantizationAwareTraining"
+        extra_cmd=$extra_cmd" --learning_rate 1e-5 \
+                   --num_train_epochs 6 \
+                   --eval_steps 100 \
+                   --save_steps 100 \
+                   --greater_is_better True \
+                   --load_best_model_at_end True \
+                   --evaluation_strategy steps \
+                   --save_strategy steps \
+                   --save_total_limit 1"
     elif [ "${topology}" = "gptj_clm_static" ]; then
         script="run_clm.py"
         DATASET_NAME="wikitext"
@@ -79,6 +94,22 @@ function run_tuning {
         DATASET_CONFIG_NAME="wikitext-2-raw-v1"
         model_name_or_path="bert-base-uncased"
         approach="PostTrainingStatic"
+    elif [ "${topology}" = "bert_mlm_qat" ]; then
+        script="run_mlm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="bert-base-uncased"
+        approach="QuantizationAwareTraining"
+        extra_cmd=$extra_cmd" --learning_rate 1e-5 \
+                   --num_train_epochs 6 \
+                   --eval_steps 100 \
+                   --save_steps 100 \
+                   --greater_is_better True \
+                   --load_best_model_at_end True \
+                   --evaluation_strategy steps \
+                   --save_strategy steps \
+                   --metric_for_best_model accuracy \
+                   --save_total_limit 1"
     elif [ "${topology}" = "bert_mlm_dynamic" ]; then
         script="run_mlm.py"
         DATASET_NAME="wikitext"
@@ -91,6 +122,22 @@ function run_tuning {
         DATASET_CONFIG_NAME="wikitext-2-raw-v1"
         model_name_or_path="xlnet-base-cased"
         approach="PostTrainingStatic"
+    elif [ "${topology}" = "xlnet_plm_qat" ]; then
+        script="run_plm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="xlnet-base-cased"
+        approach="QuantizationAwareTraining"
+        extra_cmd=$extra_cmd" --learning_rate 1e-5 \
+                   --num_train_epochs 6 \
+                   --eval_steps 100 \
+                   --save_steps 100 \
+                   --greater_is_better True \
+                   --load_best_model_at_end True \
+                   --evaluation_strategy steps \
+                   --save_strategy steps \
+                   --metric_for_best_model accuracy \
+                   --save_total_limit 1"
     elif [ "${topology}" = "xlnet_plm_dynamic" ]; then
         script="run_plm.py"
         DATASET_NAME="wikitext"
diff --git a/examples/huggingface/pytorch/multiple-choice/quantization/requirements.txt b/examples/huggingface/pytorch/multiple-choice/quantization/requirements.txt
index 8e1098c9b81..eda4eaad24e 100644
--- a/examples/huggingface/pytorch/multiple-choice/quantization/requirements.txt
+++ b/examples/huggingface/pytorch/multiple-choice/quantization/requirements.txt
@@ -2,5 +2,5 @@ datasets >= 1.1.3
 sentencepiece != 0.1.92
 protobuf
 torch >= 1.10.0
-transformers >=4.12.0
+transformers
 wandb
diff --git a/examples/huggingface/pytorch/multiple-choice/quantization/run_benchmark.sh b/examples/huggingface/pytorch/multiple-choice/quantization/run_benchmark.sh
index de633f77018..a2a87ae091b 100644
--- a/examples/huggingface/pytorch/multiple-choice/quantization/run_benchmark.sh
+++ b/examples/huggingface/pytorch/multiple-choice/quantization/run_benchmark.sh
@@ -56,10 +56,10 @@ function run_benchmark {
 
     if [ "${topology}" = "bert_base_swag_static" ]; then
         model_name_or_path="ehdwns1516/bert-base-uncased_SWAG"
-        approach="PostTrainingStatic"
     elif [ "${topology}" = "bert_base_swag_dynamic" ]; then
         model_name_or_path="ehdwns1516/bert-base-uncased_SWAG"
-        approach="PostTrainingDynamic"
+    elif [ "${topology}" = "bert_base_swag_qat" ]; then
+        model_name_or_path="ehdwns1516/bert-base-uncased_SWAG"
     fi
 
     if [[ ${mode} == "accuracy" ]]; then
diff --git a/examples/huggingface/pytorch/multiple-choice/quantization/run_swag.py b/examples/huggingface/pytorch/multiple-choice/quantization/run_swag.py
index aad4487086b..d84fa92b4fd 100644
--- a/examples/huggingface/pytorch/multiple-choice/quantization/run_swag.py
+++ b/examples/huggingface/pytorch/multiple-choice/quantization/run_swag.py
@@ -448,6 +448,13 @@ def compute_metrics(eval_predictions):
         preds = np.argmax(predictions, axis=1)
         return {"accuracy": (preds == label_ids).astype(np.float32).mean().item()}
 
+    metric_name = (
+        optim_args.metric_name
+        if optim_args.metric_name is not None
+        else "eval_accuracy"
+    )
+    training_args.metric_for_best_model = metric_name
+
     # Initialize our Trainer
     trainer = NLPTrainer(
         model=model,
@@ -459,12 +466,6 @@ def compute_metrics(eval_predictions):
         compute_metrics=compute_metrics,
     )
 
-    metric_name = (
-        optim_args.metric_name
-        if optim_args.metric_name is not None
-        else "eval_accuracy"
-    )
-
     if optim_args.tune:
 
         if not training_args.do_eval:
diff --git a/examples/huggingface/pytorch/multiple-choice/quantization/run_tuning.sh b/examples/huggingface/pytorch/multiple-choice/quantization/run_tuning.sh
index 7f315891c5b..3a718e34f0e 100644
--- a/examples/huggingface/pytorch/multiple-choice/quantization/run_tuning.sh
+++ b/examples/huggingface/pytorch/multiple-choice/quantization/run_tuning.sh
@@ -45,6 +45,18 @@ function run_tuning {
     elif [ "${topology}" = "bert_base_swag_dynamic" ]; then
         model_name_or_path="ehdwns1516/bert-base-uncased_SWAG"
         approach="PostTrainingDynamic"
+    elif [ "${topology}" = "bert_base_swag_qat" ]; then
+        model_name_or_path="ehdwns1516/bert-base-uncased_SWAG"
+        approach="QuantizationAwareTraining"
+        extra_cmd=$extra_cmd" --learning_rate 1e-5 \
+                   --num_train_epochs 6 \
+                   --eval_steps 100 \
+                   --save_steps 100 \
+                   --greater_is_better True \
+                   --load_best_model_at_end True \
+                   --evaluation_strategy steps \
+                   --save_strategy steps \
+                   --save_total_limit 1"
     fi
 
     python -u ./run_swag.py \
diff --git a/examples/huggingface/pytorch/question-answering/quantization/README.md b/examples/huggingface/pytorch/question-answering/quantization/README.md
index 8b92414e0ee..1054d011ec6 100644
--- a/examples/huggingface/pytorch/question-answering/quantization/README.md
+++ b/examples/huggingface/pytorch/question-answering/quantization/README.md
@@ -76,8 +76,7 @@ python -m torch.distributed.launch --master_addr=<MASTER_ADDRESS> --nproc_per_no
 |Dataset|Pretrained model|PostTrainingDynamic | PostTrainingStatic | QuantizationAwareTraining 
 |---|------------------------------------|---|---|---
 |squad|distilbert-base-uncased-distilled-squad| ✅| ✅| ✅
-|squad|valhalla/longformer-base-4096-finetuned-squadv1| ✅| ✅| 
-
+|squad|valhalla/longformer-base-4096-finetuned-squadv1| ✅| ✅| N/A
 ###  Intel Extension for PyTorch (IPEX) Validated model list
 |Dataset|Pretrained model|Supported IPEX Version 
 |---|------------------------------------|---
diff --git a/examples/huggingface/pytorch/question-answering/quantization/requirements.txt b/examples/huggingface/pytorch/question-answering/quantization/requirements.txt
index 78d3a52e1c3..6d932d02804 100644
--- a/examples/huggingface/pytorch/question-answering/quantization/requirements.txt
+++ b/examples/huggingface/pytorch/question-answering/quantization/requirements.txt
@@ -1,4 +1,4 @@
-datasets >= 1.8.0
-torch >= 1.10.0
-transformers>=4.12.0
+datasets >= 1.1.3
+torch >= 1.10
+transformers
 wandb
diff --git a/examples/huggingface/pytorch/question-answering/quantization/run_benchmark.sh b/examples/huggingface/pytorch/question-answering/quantization/run_benchmark.sh
index 4efc1bd6f23..574ed9b3d0c 100755
--- a/examples/huggingface/pytorch/question-answering/quantization/run_benchmark.sh
+++ b/examples/huggingface/pytorch/question-answering/quantization/run_benchmark.sh
@@ -87,19 +87,18 @@ function run_benchmark {
     elif [ "${topology}" = "longformer_base_squad_static" ]; then
         DATASET_NAME="squad"
         model_name_or_path="valhalla/longformer-base-4096-finetuned-squadv1"
-        approach="PostTrainingStatic"
     elif [ "${topology}" = "longformer_base_squad_dynamic" ]; then
         DATASET_NAME="squad"
         model_name_or_path="valhalla/longformer-base-4096-finetuned-squadv1"
-        approach="PostTrainingDynamic"
     elif [ "${topology}" = "distilbert_base_squad_ipex" ]; then
         DATASET_NAME="squad"
         model_name_or_path="distilbert-base-uncased-distilled-squad"
-        approach="PostTrainingStatic"
     elif [ "${topology}" = "bert_large_squad_ipex" ]; then
         DATASET_NAME="squad"
         model_name_or_path="bert-large-uncased-whole-word-masking-finetuned-squad"
-        approach="PostTrainingStatic"
+    elif [ "${topology}" = "distilbert_base_squad_qat" ]; then
+        DATASET_NAME="squad"
+        model_name_or_path="distilbert-base-uncased-distilled-squad"
     fi
 
     if [[ ${int8} == "true" ]]; then
diff --git a/examples/huggingface/pytorch/question-answering/quantization/run_qa.py b/examples/huggingface/pytorch/question-answering/quantization/run_qa.py
index 684ee334f36..fc33d5fbefd 100644
--- a/examples/huggingface/pytorch/question-answering/quantization/run_qa.py
+++ b/examples/huggingface/pytorch/question-answering/quantization/run_qa.py
@@ -632,6 +632,9 @@ def post_processing_function(examples, features, predictions, stage="eval"):
     def compute_metrics(p: EvalPrediction):
         return metric.compute(predictions=p.predictions, references=p.label_ids)
 
+    metric_name = optim_args.metric_name
+    training_args.metric_for_best_model = metric_name
+
     # Initialize our Trainer
     trainer = QuestionAnsweringTrainer(
         model=model,
@@ -645,7 +648,6 @@ def compute_metrics(p: EvalPrediction):
         compute_metrics=compute_metrics,
     )
 
-    metric_name = optim_args.metric_name
     calib_dataloader = trainer.get_eval_dataloader()
 
     if optim_args.tune:
diff --git a/examples/huggingface/pytorch/question-answering/quantization/run_tuning.sh b/examples/huggingface/pytorch/question-answering/quantization/run_tuning.sh
index 1e4f0b178bd..02dc6a88075 100644
--- a/examples/huggingface/pytorch/question-answering/quantization/run_tuning.sh
+++ b/examples/huggingface/pytorch/question-answering/quantization/run_tuning.sh
@@ -57,6 +57,19 @@ function run_tuning {
         DATASET_NAME="squad"
         model_name_or_path="distilbert-base-uncased-distilled-squad"
         approach="PostTrainingDynamic"
+    elif [ "${topology}" = "distilbert_base_squad_qat" ]; then
+        DATASET_NAME="squad"
+        model_name_or_path="distilbert-base-uncased-distilled-squad"
+        approach="QuantizationAwareTraining"
+        extra_cmd=$extra_cmd" --learning_rate 1e-5 \
+                   --num_train_epochs 6 \
+                   --eval_steps 100 \
+                   --save_steps 100 \
+                   --greater_is_better True \
+                   --load_best_model_at_end True \
+                   --evaluation_strategy steps \
+                   --save_strategy steps \
+                   --save_total_limit 1"
     elif [ "${topology}" = "bert_large_SQuAD_static" ]; then
         DATASET_NAME="squad"
         model_name_or_path="bert-large-uncased-whole-word-masking-finetuned-squad"
diff --git a/examples/huggingface/pytorch/summarization/quantization/run_summarization.py b/examples/huggingface/pytorch/summarization/quantization/run_summarization.py
index a9ddd8cdc1e..a53008aa863 100755
--- a/examples/huggingface/pytorch/summarization/quantization/run_summarization.py
+++ b/examples/huggingface/pytorch/summarization/quantization/run_summarization.py
@@ -669,6 +669,9 @@ def compute_metrics(eval_preds):
         result = {k: round(v, 4) for k, v in result.items()}
         return result
 
+    metric_name = optim_args.metric_name
+    training_args.metric_for_best_model = metric_name
+
     # Initialize our Trainer
     trainer = NLPSeq2SeqTrainer(
         model=model,
@@ -688,7 +691,6 @@ def compute_metrics(eval_preds):
     )
     num_beams = data_args.num_beams if data_args.num_beams is not None else training_args.generation_num_beams
 
-    metric_name = optim_args.metric_name
     if optim_args.tune:
 
         if not training_args.do_eval:
diff --git a/examples/huggingface/pytorch/text-classification/quantization/run_glue.py b/examples/huggingface/pytorch/text-classification/quantization/run_glue.py
index f2704da1eb8..3335258a56d 100644
--- a/examples/huggingface/pytorch/text-classification/quantization/run_glue.py
+++ b/examples/huggingface/pytorch/text-classification/quantization/run_glue.py
@@ -504,17 +504,6 @@ def compute_metrics(p: EvalPrediction):
     else:
         data_collator = None
 
-    # Initialize our Trainer
-    trainer = NLPTrainer(
-        model=model,
-        args=training_args,
-        train_dataset=train_dataset if training_args.do_train else None,
-        eval_dataset=eval_dataset if training_args.do_eval else None,
-        compute_metrics=compute_metrics,
-        tokenizer=tokenizer,
-        data_collator=data_collator,
-    )
-
     metric_name = (
         optim_args.metric_name
         if optim_args.metric_name is not None
@@ -527,6 +516,18 @@ def compute_metrics(p: EvalPrediction):
             else "accuracy"
         )
     )
+    training_args.metric_for_best_model = metric_name
+
+    # Initialize our Trainer
+    trainer = NLPTrainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset if training_args.do_train else None,
+        eval_dataset=eval_dataset if training_args.do_eval else None,
+        compute_metrics=compute_metrics,
+        tokenizer=tokenizer,
+        data_collator=data_collator,
+    )
 
     if optim_args.tune:
 
diff --git a/examples/huggingface/pytorch/text-generation/quantization/requirements.txt b/examples/huggingface/pytorch/text-generation/quantization/requirements.txt
index fbebe12a27d..6f0cce90cb8 100644
--- a/examples/huggingface/pytorch/text-generation/quantization/requirements.txt
+++ b/examples/huggingface/pytorch/text-generation/quantization/requirements.txt
@@ -3,4 +3,4 @@ datasets >= 2.0
 protobuf
 sentencepiece != 0.1.92
 torch >= 1.10.0
-transformers==4.23.1
\ No newline at end of file
+transformers>=4.23.1
\ No newline at end of file
diff --git a/examples/huggingface/pytorch/token-classification/quantization/requirements.txt b/examples/huggingface/pytorch/token-classification/quantization/requirements.txt
index 898b30094fd..635810f633d 100644
--- a/examples/huggingface/pytorch/token-classification/quantization/requirements.txt
+++ b/examples/huggingface/pytorch/token-classification/quantization/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 seqeval
-datasets >= 1.8.0
+datasets >= 1.1.3
 torch >= 1.10
-transformers>=4.12.0
+transformers
 wandb
diff --git a/examples/huggingface/pytorch/token-classification/quantization/run_benchmark.sh b/examples/huggingface/pytorch/token-classification/quantization/run_benchmark.sh
index d9643bebd0c..19f375298c8 100644
--- a/examples/huggingface/pytorch/token-classification/quantization/run_benchmark.sh
+++ b/examples/huggingface/pytorch/token-classification/quantization/run_benchmark.sh
@@ -69,13 +69,12 @@ function run_benchmark {
     if [ "${topology}" = "distilbert_base_ner_static" ]; then
         DATASET_NAME="conll2003"
         model_name_or_path="elastic/distilbert-base-uncased-finetuned-conll03-english "
-        model_type="bert"
-        approach="PostTrainingStatic"
     elif [ "${topology}" = "distilbert_base_ner_dynamic" ]; then
         DATASET_NAME="conll2003"
         model_name_or_path="elastic/distilbert-base-uncased-finetuned-conll03-english "
-        model_type="bert"
-        approach="PostTrainingDynamic"
+    elif [ "${topology}" = "distilbert_base_ner_qat" ]; then
+        DATASET_NAME="conll2003"
+        model_name_or_path="elastic/distilbert-base-uncased-finetuned-conll03-english "
     fi
 
     if [[ ${int8} == "true" ]]; then
diff --git a/examples/huggingface/pytorch/token-classification/quantization/run_ner.py b/examples/huggingface/pytorch/token-classification/quantization/run_ner.py
index 2395008c922..3fdfedee7a9 100644
--- a/examples/huggingface/pytorch/token-classification/quantization/run_ner.py
+++ b/examples/huggingface/pytorch/token-classification/quantization/run_ner.py
@@ -571,6 +571,8 @@ def compute_metrics(p):
                 "accuracy": results["overall_accuracy"],
             }
 
+    metric_name = optim_args.metric_name
+    training_args.metric_for_best_model = metric_name
     # Initialize our Trainer
     trainer = NLPTrainer(
         model=model,
@@ -581,7 +583,6 @@ def compute_metrics(p):
         data_collator=data_collator,
         compute_metrics=compute_metrics,
     )
-    metric_name = optim_args.metric_name
 
     if optim_args.tune:
 
diff --git a/examples/huggingface/pytorch/token-classification/quantization/run_tuning.sh b/examples/huggingface/pytorch/token-classification/quantization/run_tuning.sh
index eea497a0dd7..35616849055 100644
--- a/examples/huggingface/pytorch/token-classification/quantization/run_tuning.sh
+++ b/examples/huggingface/pytorch/token-classification/quantization/run_tuning.sh
@@ -55,6 +55,20 @@ function run_tuning {
         model_name_or_path="elastic/distilbert-base-uncased-finetuned-conll03-english "
         model_type="bert"
         approach="PostTrainingDynamic"
+    elif [ "${topology}" = "distilbert_base_ner_qat" ]; then
+        DATASET_NAME="conll2003"
+        model_name_or_path="elastic/distilbert-base-uncased-finetuned-conll03-english "
+        model_type="bert"
+        approach="QuantizationAwareTraining"
+        extra_cmd=$extra_cmd" --learning_rate 1e-5 \
+                   --num_train_epochs 6 \
+                   --eval_steps 100 \
+                   --save_steps 100 \
+                   --greater_is_better True \
+                   --load_best_model_at_end True \
+                   --evaluation_strategy steps \
+                   --save_strategy steps \
+                   --save_total_limit 1"
     fi
 
     python -u ./run_ner.py \