From 56d5ea7795b56b71b1cad6673edeee9d05c12b9b Mon Sep 17 00:00:00 2001
From: Manfei <41607353+ManfeiBai@users.noreply.github.com>
Date: Fri, 25 Aug 2023 12:55:56 -0700
Subject: [PATCH] Add LLaMA2 model to pt nightly tests (#965)

* Add LLaMA2 model to pt nightly tests

* Update targets.jsonnet adding LLaMA2

* Update llama2-model.libsonnet with data

* Update llama2-model.libsonnet format

* Update targets.jsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet

* add `latency/token` as a threshold for llama2 test

* Update llama2-model.libsonnet with `latency/token` threshold

* Update llama2-model.libsonnet

* Update llama2-model.libsonnet with training
---
 tests/pytorch/nightly/llama2-model.libsonnet | 193 +++++++++++++++++++
 tests/pytorch/nightly/targets.jsonnet        |   2 +
 2 files changed, 195 insertions(+)
 create mode 100644 tests/pytorch/nightly/llama2-model.libsonnet

diff --git a/tests/pytorch/nightly/llama2-model.libsonnet b/tests/pytorch/nightly/llama2-model.libsonnet
new file mode 100644
index 000000000..b4a6e2d77
--- /dev/null
+++ b/tests/pytorch/nightly/llama2-model.libsonnet
@@ -0,0 +1,193 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+local experimental = import '../experimental.libsonnet';
+local common = import 'common.libsonnet';
+local timeouts = import 'templates/timeouts.libsonnet';
+local tpus = import 'templates/tpus.libsonnet';
+local utils = import 'templates/utils.libsonnet';
+
+{
+  local llama2_inference = self.llama2_inference,
+  llama2_inference:: common.PyTorchTest {
+    local config = self,
+    modelName: 'l2-i',
+    paramsOverride:: {
+      scriptPath: 'llama/7B/llama2inference.sh',
+      trainCommand: [
+        'bash',
+        self.scriptPath,
+      ],
+    },
+    command: self.paramsOverride.trainCommand,
+  },
+  local llama2_training = self.llama2_training,
+  llama2_training:: common.PyTorchTest {
+    local config = self,
+    modelName: 'l2-t',
+    paramsOverride:: {
+      scriptPath: 'llama/transformers/7B/llama2training.sh',
+      trainCommand: [
+        'bash',
+        self.scriptPath,
+      ],
+    },
+    command: self.paramsOverride.trainCommand,
+  },
+  local pjrt = self.pjrt,
+  pjrt:: common.PyTorchTpuVmMixin {
+    modelName+: '-n-i',
+    tpuSettings+: {
+      tpuVmExtraSetup: |||
+        pip3 uninstall torch torch_xla torchvision libtpu-nightly -y
+        sudo apt-get update -y
+        sudo apt-get install libomp5 -y
+        pip3 install mkl mkl-include
+        pip3 install tf-nightly tb-nightly tbp-nightly
+        pip3 install numpy
+        sudo apt-get install numactl -y
+        sudo apt-get install libopenblas-dev -y
+        pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-nightly+20230821-cp310-cp310-linux_x86_64.whl
+        pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly+20230821-cp310-cp310-linux_x86_64.whl
+        pip3 install torch_xla[tpuvm]
+
+        # install tokenizer model
+        wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/llama/spiece.model
+
+        # git clone and build llama
+        git clone --branch llama2-google-next-inference https://github.com/pytorch-tpu/llama.git
+        cd llama
+        pip3 install -r requirements.txt
+        pip3 install -e .
+
+        # 7B config
+        mkdir 7B
+        cd 7B/
+        echo -e '{"dim": 4096, "multiple_of": 256, "n_heads": 32, "n_layers": 32, "norm_eps": 1e-05, "vocab_size": -1}' >> params.json
+
+        # save llama2 test
+        echo -e 'python3 llama/example_text_completion.py True "/home/xl-ml-test/llama/7B" /home/xl-ml-test/spiece.model --max_seq_len=2048 --max_gen_len=1000 --max_batch_size=2 --dynamo=True > output.txt' >> llama2inference.sh
+        echo -e 'file = open("output.txt")' >> getvalue.py
+        echo -e 'content = file.readlines()' >> getvalue.py
+        echo -e 'warm_line = content[-6]' >> getvalue.py
+        echo -e 'warm_value = float((warm_line.split())[5])' >> getvalue.py
+        echo -e 'if warm_value > 7.948752 or warm_value < 7.191728:' >> getvalue.py
+        echo -e '    raise ValueError("warm latency/token exceeded throuhold 7.57024 +- 5%")' >> getvalue.py
+        echo -e 'else:' >> getvalue.py
+        echo -e '    print("Finished llama2 test and warm latency/token within expected throuhold 7.57024 +- 5%")' >> getvalue.py
+        echo -e 'cat output.txt' >> llama2inference.sh
+        echo -e 'python3 llama/7B/getvalue.py' >> llama2inference.sh
+        cat llama2inference.sh
+      |||,
+    },
+  },
+  local hf = self.hf,
+  hf:: common.PyTorchTpuVmMixin {
+    modelName+: '-h-f',
+    tpuSettings+: {
+      tpuVmExtraSetup: |||
+        pip3 uninstall torch torch_xla torchvision libtpu-nightly -y
+        sudo apt update -y
+        sudo apt-get update -y
+        pip install accelerate -U
+        sudo apt-get install libomp5 -y
+        pip3 install mkl mkl-include
+        pip3 install tf-nightly tb-nightly tbp-nightly
+        pip3 install numpy
+        sudo apt-get install numactl -y
+        sudo apt-get install libopenblas-dev -y
+        pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-nightly+20230821-cp310-cp310-linux_x86_64.whl
+        pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly+20230821-cp310-cp310-linux_x86_64.whl
+        pip3 install torch_xla[tpuvm]
+
+        # install tokenizer model
+        wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/llama/spiece.model
+
+        # git clone and build llama
+        git clone --branch llama2-google-next-inference https://github.com/pytorch-tpu/llama.git
+        cd llama
+        pip3 install -r requirements.txt
+        pip3 install -e .
+
+        # git clone and build transformers ### llama/transformers/
+        git clone -b lsiyuan/fsdp-data-aug https://github.com/pytorch-tpu/transformers.git
+        cd transformers
+        sudo pip3 uninstall transformers
+        sudo pip3 install -e .
+        pip3 install datasets
+        pip3 install evaluate
+        pip3 install scikit-learn
+        pip3 install accelerate
+        pwd
+        ls
+
+        # 7B config
+        mkdir 7B
+        cd 7B/
+        wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/configs/hf_llama/7B.json
+
+        # save llama2 training
+        echo -e 'python3 -u llama/transformers/examples/pytorch/xla_spawn.py --num_cores 64 llama/transformers/examples/pytorch/language-modeling/run_clm.py    --num_train_epochs 2  --dataset_name wikitext     --dataset_config_name wikitext-2-raw-v1     --per_device_train_batch_size 8 --do_train --output_dir . --overwrite_output_dir --config_name llama/transformers/7B/7B.json --cache_dir /tmp --tokenizer_name gpt2 --block_size 1024 --optim adafactor --adafactor true  --save_strategy no --logging_strategy no' >> llama2training.sh
+        cat llama2training.sh
+        pwd
+        ls
+      |||,
+    },
+  },
+  local hff = self.hff,
+  hff:: common.PyTorchTpuVmMixin {
+    modelName+: '-hff',
+    tpuSettings+: {
+      tpuVmExtraSetup: |||
+        # gsutil cp -r gs://tpu-pytorch/lsiyuan-experiment/configs/hf_llama /tmp/
+
+        # install tokenizer model ### llama/llama2-fsdp/transformers/spiece.model
+        ## wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/llama/spiece.model
+        ## pwd
+        ## ls
+
+        # 7B config ### llama/llama2-fsdp/transformers/7B/
+        ## mkdir 7B
+        ## cd 7B/
+        ## ## echo -e '{"dim": 4096, "multiple_of": 256, "n_heads": 32, "n_layers": 32, "norm_eps": 1e-05, "vocab_size": -1}' >> params.json
+        ## wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/configs/hf_llama/7B.json
+
+        pwd
+        ls
+
+        # save llama2 training ### llama/llama2-fsdp/transformers/7B/llama2training.sh
+        ## echo -e 'export PJRT_DEVICE=TPU' >> llama2training.sh
+        ## echo -e 'export PT_XLA_DEBUG=0' >> llama2training.sh
+        ## echo -e 'export USE_TORCH=ON' >> llama2training.sh
+        ## echo -e 'export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH' >> llama2training.sh
+        ## echo -e 'export PROFILE_LOGDIR=/tmp/llama-7b-bs-8' >> llama2training.sh
+        ## echo -e 'mkdir /tmp/test-clm-llama' >> llama2training.sh
+        echo -e 'python3 -u llama/llama2-fsdp/transformers/examples/pytorch/xla_spawn.py --num_cores 64 llama/llama2-fsdp/transformers/examples/pytorch/language-modeling/run_clm.py    --num_train_epochs 2  --dataset_name wikitext     --dataset_config_name wikitext-2-raw-v1     --per_device_train_batch_size 8 --do_train --output_dir /tmp/test-clm-llama     --overwrite_output_dir --config_name llama/llama2-fsdp/transformers/7B/7B.json --cache_dir /tmp --tokenizer_name gpt2 --block_size 1024 --optim adafactor --adafactor true  --save_strategy no --logging_strategy no' >> llama2training.sh
+        cat llama2training.sh
+        pwd
+        ls
+      |||,
+    },
+  },
+
+  local v4_8 = self.v4_8,
+  v4_8:: {
+    accelerator: tpus.v4_8,
+  },
+
+  configs: [
+    llama2_inference + v4_8 + common.Functional + timeouts.Hours(3) + pjrt,
+    llama2_training + v4_8 + common.Functional + timeouts.Hours(3) + hf,
+  ],
+}
diff --git a/tests/pytorch/nightly/targets.jsonnet b/tests/pytorch/nightly/targets.jsonnet
index 46366c200..c9a8e281a 100644
--- a/tests/pytorch/nightly/targets.jsonnet
+++ b/tests/pytorch/nightly/targets.jsonnet
@@ -19,6 +19,7 @@ local hfFsmt = import 'hf-fsmt.libsonnet';
 local huggingfaceGlue = import 'hf-glue.libsonnet';
 local huggingfaceGPT2 = import 'hf-llm.libsonnet';
 local huggingfaceVitMae = import 'hf-mae.libsonnet';
+local llama2 = import 'llama2-model.libsonnet';
 local mnist = import 'mnist.libsonnet';
 local resnet50_mp = import 'resnet50-mp.libsonnet';
 local stableDif = import 'sd-model.libsonnet';
@@ -34,4 +35,5 @@ std.flattenArrays([
   mnist.configs,
   resnet50_mp.configs,
   stableDif.configs,
+  llama2.configs,
 ])