Fixed QAT error for PyTorch model (intel#852)

delock · Apr 25, 2023 · c7e6653 · c7e6653
1 parent bdeadda
commit c7e6653
Show file tree

Hide file tree

Showing 25 changed files with 284 additions and 48 deletions.
diff --git a/examples/.config/pytorch_optimize.json b/examples/.config/pytorch_optimize.json
@@ -20,6 +20,27 @@
       }
     }
   },
+  "gpt_neo_clm_qat": {
+    "working_dir": "huggingface/pytorch/language-modeling/quantization",
+    "tune":{
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "gpt_neo_clm_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "gpt_neo_clm_qat",
+        "mode": "accuracy",
+        "batch_size": "8",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "bloom_text_static": {
     "working_dir": "huggingface/pytorch/text-generation/quantization",
     "tune":{
@@ -146,6 +167,27 @@
       }
     }
   },
+  "bert_mlm_qat": {
+    "working_dir": "huggingface/pytorch/language-modeling/quantization",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "bert_mlm_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "bert_mlm_qat",
+        "mode": "accuracy",
+        "batch_size": "8",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "bert_mlm_dynamic": {
     "working_dir": "huggingface/pytorch/language-modeling/quantization/",
     "tune": {
@@ -188,6 +230,27 @@
       }
     }
   },
+  "xlnet_plm_qat": {
+    "working_dir": "huggingface/pytorch/language-modeling/quantization/",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "xlnet_plm_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "xlnet_plm_qat",
+        "mode": "accuracy",
+        "batch_size": "8",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "xlnet_plm_dynamic": {
     "working_dir": "huggingface/pytorch/language-modeling/quantization/",
     "tune": {
@@ -294,6 +357,27 @@
       }
     }
   },
+  "bert_base_swag_qat": {
+    "working_dir": "huggingface/pytorch/multiple-choice/quantization/",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "bert_base_swag_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "bert_base_swag_qat",
+        "mode": "accuracy",
+        "batch_size": "64",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "bert_base_swag_dynamic": {
     "working_dir": "huggingface/pytorch/multiple-choice/quantization/",
     "tune": {
@@ -336,6 +420,27 @@
       }
     }
   },
+  "distilbert_base_squad_qat": {
+    "working_dir": "huggingface/pytorch/question-answering/quantization/",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "distilbert_base_squad_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "distilbert_base_squad_qat",
+        "mode": "accuracy",
+        "batch_size": "64",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "distilbert_base_squad_dynamic": {
     "working_dir": "huggingface/pytorch/question-answering/quantization/",
     "tune": {
@@ -819,6 +924,27 @@
       }
     }
   },
+  "distilbert_base_ner_qat": {
+    "working_dir": "huggingface/pytorch/token-classification/quantization/",
+    "tune": {
+      "cmd": "bash run_tuning.sh",
+      "params": {
+        "topology": "distilbert_base_ner_qat",
+        "output_model": "saved_results"
+      }
+    },
+    "benchmark": {
+      "cmd": "bash run_benchmark.sh",
+      "params": {
+        "topology": "distilbert_base_ner_qat",
+        "mode": "accuracy",
+        "batch_size": "64",
+        "iters": "100",
+        "int8": "false",
+        "config": "saved_results"
+      }
+    }
+  },
   "distilbert_base_ner_dynamic": {
     "working_dir": "huggingface/pytorch/token-classification/quantization/",
     "tune": {

diff --git a/examples/huggingface/pytorch/image-classification/quantization/README.md b/examples/huggingface/pytorch/image-classification/quantization/README.md
@@ -25,4 +25,4 @@ run run_benchmark.sh
 
 |Dataset|Pretrained model|PostTrainingDynamic | PostTrainingStatic | QuantizationAwareTraining
 |---|------------------------------------|---|---|---
-|imagenet-1k|google/vit-base-patch16-224| ✅| ✅| N/✅|
+|imagenet-1k|google/vit-base-patch16-224| ✅| ✅| N/A|
diff --git a/examples/huggingface/pytorch/image-classification/quantization/run_image_classification.py b/examples/huggingface/pytorch/image-classification/quantization/run_image_classification.py
@@ -408,6 +408,9 @@ def val_transforms(example_batch):
         # Set the validation transforms
         dataset["validation"].set_transform(val_transforms)
 
+    metric_name = optim_args.metric_name
+    training_args.metric_for_best_model = metric_name
+
     # Initalize our trainer
     trainer = NLPTrainer(
         model=model,
@@ -418,8 +421,6 @@ def val_transforms(example_batch):
         tokenizer=feature_extractor,
         data_collator=collate_fn,
     )
-
-    metric_name = optim_args.metric_name
 
     if optim_args.tune:
 

diff --git a/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt b/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt
@@ -1,9 +1,9 @@
 accelerate
-datasets >= 1.8.0
 protobuf
 sentencepiece != 0.1.92
-torch >= 1.10.0
-transformers>=4.12.0
+datasets >= 1.1.3
+torch >= 1.10
+transformers
 wandb
 neural_compressor
 git+https://github.com/EleutherAI/lm-evaluation-harness.git@83dbfbf6070324f3e5872f63e49d49ff7ef4c9b3
diff --git a/examples/huggingface/pytorch/language-modeling/quantization/run_benchmark.sh b/examples/huggingface/pytorch/language-modeling/quantization/run_benchmark.sh
@@ -135,6 +135,21 @@ function run_benchmark {
         script="run_clm.py"
         DATASET_NAME="lambada"
         model_name_or_path="bigscience/bloom-560m"
+    elif [ "${topology}" = "gpt_neo_clm_qat" ]; then
+        script="run_clm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="EleutherAI/gpt-neo-125M"
+    elif [ "${topology}" = "bert_mlm_qat" ]; then
+        script="run_mlm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="bert-base-uncased"
+    elif [ "${topology}" = "xlnet_plm_qat" ]; then
+        script="run_plm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="xlnet-base-cased"
     fi
 
     if [[ ${int8} == "true" ]]; then

diff --git a/examples/huggingface/pytorch/language-modeling/quantization/run_clm.py b/examples/huggingface/pytorch/language-modeling/quantization/run_clm.py
@@ -541,6 +541,9 @@ def compute_metrics(eval_preds):
             preds = preds[:, :-1].reshape(-1)
             return metric.compute(predictions=preds, references=labels)
 
+    metric_name = optim_args.metric_name
+    training_args.metric_for_best_model = metric_name
+
     # Initialize our Trainer
     trainer = NLPTrainer(
         model=model,
@@ -555,7 +558,6 @@ def compute_metrics(eval_preds):
         if training_args.do_eval and not is_torch_tpu_available()
         else None,
     )
-    metric_name = optim_args.metric_name
 
     if optim_args.tune:
 

diff --git a/examples/huggingface/pytorch/language-modeling/quantization/run_mlm.py b/examples/huggingface/pytorch/language-modeling/quantization/run_mlm.py
@@ -561,6 +561,8 @@ def compute_metrics(eval_preds):
         mlm_probability=data_args.mlm_probability,
         pad_to_multiple_of=8 if pad_to_multiple_of_8 else None,
     )
+    metric_name = optim_args.metric_name
+    training_args.metric_for_best_model = metric_name
 
     # Initialize our Trainer
     trainer = NLPTrainer(
@@ -575,7 +577,6 @@ def compute_metrics(eval_preds):
         if training_args.do_eval and not is_torch_tpu_available()
         else None,
     )
-    metric_name = optim_args.metric_name
 
     if optim_args.tune:
 

diff --git a/examples/huggingface/pytorch/language-modeling/quantization/run_plm.py b/examples/huggingface/pytorch/language-modeling/quantization/run_plm.py
@@ -511,6 +511,8 @@ def group_texts(examples):
         plm_probability=data_args.plm_probability,
         max_span_length=data_args.max_span_length,
     )
+    metric_name = optim_args.metric_name
+    training_args.metric_for_best_model = metric_name
 
     # Initialize our Trainer
     trainer = NLPTrainer(
@@ -521,7 +523,6 @@ def group_texts(examples):
         tokenizer=tokenizer,
         data_collator=data_collator,
     )
-    metric_name = optim_args.metric_name
 
     if optim_args.tune:
 

diff --git a/examples/huggingface/pytorch/language-modeling/quantization/run_tuning.sh b/examples/huggingface/pytorch/language-modeling/quantization/run_tuning.sh
@@ -56,6 +56,21 @@ function run_tuning {
         DATASET_CONFIG_NAME="wikitext-2-raw-v1"
         model_name_or_path="EleutherAI/gpt-neo-125M"
         approach="PostTrainingDynamic"
+    elif [ "${topology}" = "gpt_neo_clm_qat" ]; then
+        script="run_clm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="EleutherAI/gpt-neo-125M"
+        approach="QuantizationAwareTraining"
+        extra_cmd=$extra_cmd" --learning_rate 1e-5 \
+                   --num_train_epochs 6 \
+                   --eval_steps 100 \
+                   --save_steps 100 \
+                   --greater_is_better True \
+                   --load_best_model_at_end True \
+                   --evaluation_strategy steps \
+                   --save_strategy steps \
+                   --save_total_limit 1"
     elif [ "${topology}" = "gptj_clm_static" ]; then
         script="run_clm.py"
         DATASET_NAME="wikitext"
@@ -79,6 +94,22 @@ function run_tuning {
         DATASET_CONFIG_NAME="wikitext-2-raw-v1"
         model_name_or_path="bert-base-uncased"
         approach="PostTrainingStatic"
+    elif [ "${topology}" = "bert_mlm_qat" ]; then
+        script="run_mlm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="bert-base-uncased"
+        approach="QuantizationAwareTraining"
+        extra_cmd=$extra_cmd" --learning_rate 1e-5 \
+                   --num_train_epochs 6 \
+                   --eval_steps 100 \
+                   --save_steps 100 \
+                   --greater_is_better True \
+                   --load_best_model_at_end True \
+                   --evaluation_strategy steps \
+                   --save_strategy steps \
+                   --metric_for_best_model accuracy \
+                   --save_total_limit 1"
     elif [ "${topology}" = "bert_mlm_dynamic" ]; then
         script="run_mlm.py"
         DATASET_NAME="wikitext"
@@ -91,6 +122,22 @@ function run_tuning {
         DATASET_CONFIG_NAME="wikitext-2-raw-v1"
         model_name_or_path="xlnet-base-cased"
         approach="PostTrainingStatic"
+    elif [ "${topology}" = "xlnet_plm_qat" ]; then
+        script="run_plm.py"
+        DATASET_NAME="wikitext"
+        DATASET_CONFIG_NAME="wikitext-2-raw-v1"
+        model_name_or_path="xlnet-base-cased"
+        approach="QuantizationAwareTraining"
+        extra_cmd=$extra_cmd" --learning_rate 1e-5 \
+                   --num_train_epochs 6 \
+                   --eval_steps 100 \
+                   --save_steps 100 \
+                   --greater_is_better True \
+                   --load_best_model_at_end True \
+                   --evaluation_strategy steps \
+                   --save_strategy steps \
+                   --metric_for_best_model accuracy \
+                   --save_total_limit 1"
     elif [ "${topology}" = "xlnet_plm_dynamic" ]; then
         script="run_plm.py"
         DATASET_NAME="wikitext"

diff --git a/examples/huggingface/pytorch/multiple-choice/quantization/requirements.txt b/examples/huggingface/pytorch/multiple-choice/quantization/requirements.txt
@@ -2,5 +2,5 @@ datasets >= 1.1.3
 sentencepiece != 0.1.92
 protobuf
 torch >= 1.10.0
-transformers >=4.12.0
+transformers
 wandb
diff --git a/examples/huggingface/pytorch/multiple-choice/quantization/run_benchmark.sh b/examples/huggingface/pytorch/multiple-choice/quantization/run_benchmark.sh
@@ -56,10 +56,10 @@ function run_benchmark {
 
     if [ "${topology}" = "bert_base_swag_static" ]; then
         model_name_or_path="ehdwns1516/bert-base-uncased_SWAG"
-        approach="PostTrainingStatic"
     elif [ "${topology}" = "bert_base_swag_dynamic" ]; then
         model_name_or_path="ehdwns1516/bert-base-uncased_SWAG"
-        approach="PostTrainingDynamic"
+    elif [ "${topology}" = "bert_base_swag_qat" ]; then
+        model_name_or_path="ehdwns1516/bert-base-uncased_SWAG"
     fi
 
     if [[ ${mode} == "accuracy" ]]; then

diff --git a/examples/huggingface/pytorch/multiple-choice/quantization/run_swag.py b/examples/huggingface/pytorch/multiple-choice/quantization/run_swag.py
@@ -448,6 +448,13 @@ def compute_metrics(eval_predictions):
         preds = np.argmax(predictions, axis=1)
         return {"accuracy": (preds == label_ids).astype(np.float32).mean().item()}
 
+    metric_name = (
+        optim_args.metric_name
+        if optim_args.metric_name is not None
+        else "eval_accuracy"
+    )
+    training_args.metric_for_best_model = metric_name
+
     # Initialize our Trainer
     trainer = NLPTrainer(
         model=model,
@@ -459,12 +466,6 @@ def compute_metrics(eval_predictions):
         compute_metrics=compute_metrics,
     )
 
-    metric_name = (
-        optim_args.metric_name
-        if optim_args.metric_name is not None
-        else "eval_accuracy"
-    )
-
     if optim_args.tune:
 
         if not training_args.do_eval: