From 64d2765ea484a2d7056056f870831cd5d0739ed2 Mon Sep 17 00:00:00 2001
From: Manfei <41607353+ManfeiBai@users.noreply.github.com>
Date: Mon, 21 Aug 2023 10:41:44 -0700
Subject: [PATCH] Update llama2-model.libsonnet

---
 tests/pytorch/nightly/llama2-model.libsonnet | 242 +++----------------
 1 file changed, 34 insertions(+), 208 deletions(-)

diff --git a/tests/pytorch/nightly/llama2-model.libsonnet b/tests/pytorch/nightly/llama2-model.libsonnet
index bbe129ab2..9d71f26c9 100644
--- a/tests/pytorch/nightly/llama2-model.libsonnet
+++ b/tests/pytorch/nightly/llama2-model.libsonnet
@@ -19,22 +19,6 @@ local tpus = import 'templates/tpus.libsonnet';
 local utils = import 'templates/utils.libsonnet';
 
 {
-  local llama2_google_next_inference_pretrained_models_eager = self.llama2_google_next_inference_pretrained_models_eager,
-  llama2_google_next_inference_pretrained_models_eager:: common.PyTorchTest {
-    local config = self,
-    modelName: 'l2-e',
-    paramsOverride:: {
-      scriptPath: 'llama/example_text_completion.py',
-      trainCommand: [
-        'torchrun --nproc_per_node 1',
-        self.scriptPath,
-        '--ckpt_dir llama_2_model/llama-2-13b-dummy/',
-        '--tokenizer_path llama_2_model/tokenizer.model',
-        '--max_seq_len 128 --max_batch_size 4',
-      ],
-    },
-    command: self.paramsOverride.trainCommand,
-  },
   local llama2_google_next_inference_pretrained_models = self.llama2_google_next_inference_pretrained_models,
   llama2_google_next_inference_pretrained_models:: common.PyTorchTest {
     local config = self,
@@ -44,207 +28,53 @@ local utils = import 'templates/utils.libsonnet';
       trainCommand: [
         'python3',
         self.scriptPath,
-        '--ckpt_dir 7B/',
+        '--ckpt_dir llama/7B/',
         '--tokenizer_path spiece.model',
         '--max_seq_len 128 --max_batch_size 4',
       ],
     },
     command: self.paramsOverride.trainCommand,
   },
-  local llama2_google_next_inference_fine_tuned_chat_models_eager = self.llama2_google_next_inference_fine_tuned_chat_models_eager,
-  llama2_google_next_inference_fine_tuned_chat_models_eager:: common.PyTorchTest {
-    local config = self,
-    modelName: 'l2-c-e',
-    paramsOverride:: {
-      scriptPath: 'llama/example_chat_completion.py',
-      trainCommand: [
-        'torchrun --nproc_per_node 1',
-        self.scriptPath,
-        '--ckpt_dir llama_2_model/llama-2-13b-dummy/',
-        '--tokenizer_path llama_2_model/tokenizer.model',
-        '--max_seq_len 512 --max_batch_size 4',
-      ],
-    },
-    command: self.paramsOverride.trainCommand,
-  },
-  local llama2_google_next_inference_fine_tuned_chat_models = self.llama2_google_next_inference_fine_tuned_chat_models,
-  llama2_google_next_inference_fine_tuned_chat_models:: common.PyTorchTest {
-    local config = self,
-    modelName: 'l2-c',
-    paramsOverride:: {
-      scriptPath: 'llama/example_chat_completion.py',
-      trainCommand: [
-        'python3',
-        self.scriptPath,
-        '--ckpt_dir llama_2_model/llama-2-13b-dummy/',
-        '--tokenizer_path llama_2_model/tokenizer.model',
-        '--max_seq_len 512 --max_batch_size 4',
-        '--mp True --dynamo True',
-      ],
-    },
-    command: self.paramsOverride.trainCommand,
-  },
-  local llama2_stable_tokenizer = self.llama2_stable_tokenizer,
-  llama2_stable_tokenizer:: common.PyTorchTest {
-    local config = self,
-    modelName: 'l2-t',
-    paramsOverride:: {
-      scriptPath: 'example.py',
-      trainCommand: [
-        'python3',
-        self.scriptPath,
-        '--ckpt_dir llama_2_model/llama-2-13b-dummy/',
-        '--tokenizer_path llama_2_model/tokenizer.model',
-        '--mp True --dynamo True',
-      ],
-    },
-    command: self.paramsOverride.trainCommand,
-  },
-  local llama2_stable_quant = self.llama2_stable_quant,
-  llama2_stable_quant:: common.PyTorchTest {
-    local config = self,
-    modelName: 'l2-q',
-    paramsOverride:: {
-      scriptPath: 'example_xla.py',
-      trainCommand: [
-        'python3',
-        self.scriptPath,
-        '--tokenizer_path llama_2_model/tokenizer.model',
-        '--ckpt_dir llama_2_model/tokenizer.model',
-        '--max_seq_len 256',
-        '--max_batch_size 1',
-        '--temperature 0.8',
-        '--mp True --dynamo True',
-      ],
-    },
-    command: self.paramsOverride.trainCommand,
-  },
-  local llama2_stable_quant_without_download = self.llama2_stable_quant_without_download,
-  llama2_stable_quant_without_download:: common.PyTorchTest {
-    local config = self,
-    modelName: 'l2-q-w',
-    paramsOverride:: {
-      scriptPath: 'example_xla.py',
-      trainCommand: [
-        'python3',
-        self.scriptPath,
-        '--tokenizer_path llama_2_model/tokenizer.model',
-        '--max_seq_len 256',
-        '--max_batch_size 1',
-        '--temperature 0.8',
-        '--dim 4096',
-        '--n_heads 32',
-        '--n_layers 32',
-        '--mp True',
-      ],
-    },
-    command: self.paramsOverride.trainCommand,
-  },
-  local llama2_google_next_inference = self.llama2_google_next_inference,
-  llama2_google_next_inference:: common.PyTorchTpuVmMixin {
+  local pjrt = self.pjrt,
+  pjrt:: common.PyTorchTpuVmMixin {
     modelName+: '-n-i',
     tpuSettings+: {
+      tpuVmExports+: |||
+        export LD_LIBRARY_PATH=/usr/local/lib/
+        export PT_XLA_DEBUG=0
+        export USE_TORCH=ON
+      |||,
       tpuVmExtraSetup: |||
-        sudo apt update 
-        sudo apt-get -y install libopenblas-dev
-        pip install accelerate -U
-        sudo apt update
-        sudo apt-get -y install libopenblas-dev
-        pip3 uninstall -y torch torch_xla
-        ## pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-nightly-cp38-cp38-linux_x86_64.whl
-        ## pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly-cp38-cp38-linux_x86_64.whl
-        pip3 install https://storage.googleapis.com/tpu-pytorch/wheels/tpuvm/torch-nightly+20230422-cp38-cp38-linux_x86_64.whl
-        pip3 install https://storage.googleapis.com/tpu-pytorch/wheels/cuda/118/torch_xla-nightly+20230422-cp38-cp38-linux_x86_64.whl
-        pip3 install https://storage.googleapis.com/tpu-pytorch/wheels/cuda/118/torchvision-nightly+20230422-cp38-cp38-linux_x86_64.whl
-        pip3 uninstall -y libtpu-nightly
-        pip3 install torch_xla[tpuvm] --user
-        # show current path
-        pwd
-        ## /home/xl-ml-test
-        ls
-        ## pytorch snap
-        git clone -b llama2-google-next-inference https://github.com/pytorch-tpu/llama.git
+        pip3 uninstall torch torch_xla torchvision libtpu-nightly -y
+        sudo apt-get update -y
+        sudo apt-get install libomp5 -y
+        pip3 install mkl mkl-include
+        pip3 install tf-nightly tb-nightly tbp-nightly
+        pip3 install numpy
+        sudo apt-get install numactl -y
+        sudo apt-get install libopenblas-dev -y
+        pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-nightly+20230726-cp38-cp38-linux_x86_64.whl
+        pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly+20230726-cp38-cp38-linux_x86_64.whl
+        pip3 install torch_xla[tpuvm]
+
+        # install tokenizer model
+        wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/llama/spiece.model
+
+        # git clone and build llama
+        git clone --branch llama2-google-next-inference https://github.com/pytorch-tpu/llama.git
         cd llama
-        # show current path
-        pwd
-        ## /home/xl-ml-test/llama
-        ls
-        ## CODE_OF_CONDUCT.md   CONTRIBUTING.md   LICENSE MODEL_CARD.md   README.md   Responsible-Use-Guide.pdf   USE_POLICY.md   download.sh   example_chat_completion.py
-        ## example_text_completion.py   llama   requirements.txt   reshard_checkpoints.py   setup.py
-        pip list | grep torch
-        ## torch                    2.1.0
-        ## torch-xla                2.1.0
-        ## torchvision              0.16.0.dev20230817+cpu
-        pip install -r requirements.txt
-        pip install -e .
-        pip list | grep torch
-        ## torch                    2.1.0
-        ## torch-xla                2.1.0
-        ## torchvision              0.16.0.dev20230817+cpu
-        # prepare data
-        # show current path
-        pwd
-        ## /home/xl-ml-test/llama
-        ls
-        ## CODE_OF_CONDUCT.md   CONTRIBUTING.md   LICENSE   MODEL_CARD.md   README.md   Responsible-Use-Guide.pdf   USE_POLICY.md   download.sh   example_chat_completion.py
-        ## example_text_completion.py   llama   llama.egg-info   requirements.txt   reshard_checkpoints.py   setup.py
-        # --- wget https://storage.mtls.cloud.google.com/manfei_bucket/LLaMA2/llama_2_model.zip
-        # show current path
-        pwd
-        ## /home/xl-ml-test/llama
-        ls
-        ## CODE_OF_CONDUCT.md   CONTRIBUTING.md   LICENSE   MODEL_CARD.md   README.md   Responsible-Use-Guide.pdf   USE_POLICY.md   download.sh   example_chat_completion.py
-        ## example_text_completion.py   llama   llama.egg-info   llama_2_model.zip   requirements.txt   reshard_checkpoints.py   setup.py
-        # --- sudo apt-get install unzip
-        # --- unzip llama_2_model.zip
-        ## wget -nv -O llama_2_model.zip https://storage.mtls.cloud.google.com/manfei_bucket/LLaMA2/llama_2_model.zip
-        ## unzip -o llama_2_model.zip
-        ## 7B
-        gsutil cp gs://tpu-pytorch/lsiyuan-experiment/llama/spiece.model .
+        pip3 install -r requirements.txt
+        pip3 install -e .
+
+        # 7B config
         mkdir 7B
         cd 7B/
         echo -e '{"dim": 4096, "multiple_of": 256, "n_heads": 32, "n_layers": 32, "norm_eps": 1e-05, "vocab_size": -1}' >> params.json
-        ## unzip:  cannot find zipfile directory in one of llama_2_model.zip or llama_2_model.zip.zip, and cannot find llama_2_model.zip.ZIP, period.
-        # show current path
+        cat params.json
         pwd
-        ## /home/xl-ml-test/llama
-        ls
-        ## CODE_OF_CONDUCT.md   CONTRIBUTING.md   LICENSE   MODEL_CARD.md   README.md   Responsible-Use-Guide.pdf   USE_POLICY.md   download.sh   example_chat_completion.py
-        ## example_text_completion.py   llama   llama.egg-info   llama_2_model.zip   requirements.txt   reshard_checkpoints.py   setup.py
-      |||,
-    },
-  },
-  local stable = self.stable,
-  stable:: common.PyTorchTpuVmMixin {
-    modelName+: '-s',
-    tpuSettings+: {
-      tpuVmExtraSetup: |||
-        git clone -b stable https://github.com/pytorch-tpu/llama.git
-        cd llama
-        pip install -r requirements.txt
-        pip install -e .
-        # prepare data
-        wget https://storage.mtls.cloud.google.com/manfei_bucket/LLaMA2/llama_2_model.zip
-        sudo apt-get install unzip
-        unzip llama_2_model.zip
-      |||,
-    },
-  },
-  local xla = self.xla,
-  xla:: common.PyTorchTpuVmMixin {
-    modelName+: '-xla',
-    tpuSettings+: {
-      tpuVmExtraSetup: |||
-        sudo apt update 
-        sudo apt-get -y install libopenblas-dev
-        pip install accelerate -U
-        sudo apt update
-        sudo apt-get -y install libopenblas-dev
-        pip3 uninstall -y torch torch_xla
-        pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-nightly-cp310-cp310-linux_x86_64.whl
-        pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly-cp310-cp310-linux_x86_64.whl
-        pip3 uninstall -y libtpu-nightly
-        pip3 install torch_xla[tpuvm] --user
+
+        # set up env
+        echo 'export PATH=~/.local/bin:$PATH' >> ~/.bash_profile
       |||,
     },
   },
@@ -255,10 +85,6 @@ local utils = import 'templates/utils.libsonnet';
   },
 
   configs: [
-    llama2_google_next_inference_pretrained_models + v4_8 + common.Functional + timeouts.Hours(3) + llama2_google_next_inference,
-    // llama2_google_next_inference_fine_tuned_chat_models + v4_8 + common.Functional + timeouts.Hours(3) + llama2_google_next_inference + xla,
-    llama2_stable_tokenizer + v4_8 + common.Functional + timeouts.Hours(3) + stable + xla,
-    llama2_stable_quant + v4_8 + common.Functional + timeouts.Hours(3) + stable + xla,
-    llama2_stable_quant_without_download + v4_8 + common.Functional + timeouts.Hours(3) + stable + xla,
+    llama2_google_next_inference_pretrained_models + v4_8 + common.Functional + timeouts.Hours(3) + pjrt,
   ],
 }