diff --git a/.gitignore b/.gitignore index 68bc17f..840dcde 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,11 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# Qlora +qlora-out +last_run_prepared +sql-model + +# Docker +localstorage diff --git a/Docker/.gitignore b/Docker/.gitignore new file mode 100644 index 0000000..0142e1e --- /dev/null +++ b/Docker/.gitignore @@ -0,0 +1 @@ +env.list \ No newline at end of file diff --git a/Docker/Dockerfile b/Docker/Dockerfile new file mode 100644 index 0000000..b9d00e1 --- /dev/null +++ b/Docker/Dockerfile @@ -0,0 +1,72 @@ +# Declare arguments +FROM "nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04" + +# common vars +ENV ENVNAME=lmhacker +ENV CURRENTUSER=aieng +ENV CURRENTUSER_UID=1000 +ENV CURRENTUSER_GID=1000 +## Ensure bash is the default shell +ENV SHELL=/bin/bash + +# Prevent stop building ubuntu at time zone selection. +ENV DEBIAN_FRONTEND=noninteractive + +################### AS ROOT ################### + +# COMMON TOOLS +RUN apt-get update --fix-missing && apt-get install -y wget bzip2 ca-certificates \ + git time vim curl grep sed tig + +# MINICONDA +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc + +RUN /bin/bash -c ". /opt/conda/etc/profile.d/conda.sh && conda update -n base -c defaults conda" + +################# NON ROOT ################### + +# Add non root user +RUN addgroup --gid=1000 $CURRENTUSER && adduser --uid=$CURRENTUSER_UID --gid=$CURRENTUSER_GID --system --shell=/bin/bash $CURRENTUSER +RUN mkdir /context && chown $CURRENTUSER /context +USER $CURRENTUSER +RUN echo "echo '===ENVIRONMENT==='" >> ~/.bashrc +RUN echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc +RUN echo "conda activate $ENVNAME" >> ~/.bashrc + +# ENVIRONMENT SCRIPT +WORKDIR /context +COPY --chown=$CURRENTUSER environment.yml /context/ +USER $CURRENTUSER + +# INSTALL ENVIRONMENT +RUN /bin/bash -c ". /opt/conda/etc/profile.d/conda.sh && conda env create -f environment.yml -n $ENVNAME" + +# INSTALL AXOLOTL TO LIBS +# Clone the repository +WORKDIR /libs +RUN git clone https://github.com/OpenAccess-AI-Collective/axolotl +# Install axolotl +RUN /bin/bash -c ". /opt/conda/etc/profile.d/conda.sh && conda activate $ENVNAME && \ + cd axolotl && \ + pip install -e '.[deepspeed]' && \ + pip install flash-attn && \ + pip install -U git+https://github.com/huggingface/peft.git" + +# clean pip cache and conda +RUN /bin/bash -c ". /opt/conda/etc/profile.d/conda.sh && conda activate $ENVNAME && conda clean --all -f -y && pip cache purge" + +# CONTAINER STARTUP SCRIPT +WORKDIR /context +COPY --chown=$CURRENTUSER container_startup.sh /context/ +RUN chmod +x container_startup.sh + +USER root +RUN mkdir -p /home/dldev/.cache/huggingface && chown -R $CURRENTUSER:$CURRENTUSER /home/dldev/.cache/huggingface +USER $CURRENTUSER + +WORKDIR /workspace + +CMD ["/bin/bash", "-c", "/context/container_startup.sh"] diff --git a/Docker/README.docker.md b/Docker/README.docker.md new file mode 100644 index 0000000..75bc307 --- /dev/null +++ b/Docker/README.docker.md @@ -0,0 +1,89 @@ +# Containerized workflow for lm-hackers + +## Description + +This repository contains Dockerfiles and scripts for setting up and running a containerized development environment with GPU support for `lm-hackers`. The container is fully isolated from the host system but can mount local files for development ease. This setup is ideal for working on projects that require specific system configurations and dependencies, including computer vision, deep learning, and computer graphics tasks. + +## Prerequisites + +- Docker or Podman +- NVIDIA Docker (nvidia-docker) +- NVIDIA GPU (tested with GTX 3090 and 4090) + +## Credentials + +To properly run lm-hackers notebook end-to-end you need to provide proper Openai and Huggingface credentials. +There are two supported way to do it: + +1) Using `./Docker/env.list` file like this: +``` +OPENAI_API_KEY=... +HUGGINGFACE_TOKEN=... +``` + +2) Adding `OPENAI_API_KEY` and `HUGGINGFACE_TOKEN` environment variables to the host. + +NOTE: the credentials will be used only during run step, none of them will be saved inside the container during build step. + +## Additional steps + +In order to download llama2 models you need to accept META and Huggingface terms of service. For more information see [https://huggingface.co/meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). + +## Build +The following step is needed to build the container. The build step will install conda, create the environment and install all additional libraries (ie: axolotl, deepspeed, peft). The result is the `lmhacker` image that will be used in the next step. + +NOTE: the base image is official nvidia cuda one (`nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04`). + +**TO EXECUTE**: From `./Docker` folder: ++ `./build.sh docker` : to build for docker. ++ `./build.sh podman` : to build for podman. + +## Run +The following step will execute the container, mounting the `lm-hacker` repo folder as `/workspace` inside the container and optionally (with `localstorage` option) mounting Huggingface `.cache` to `localstorage` subfolder of `lm-hacker` repo. + +IMPORTANT: the container runs in a stateless way (with `--rm` option), so anything that's saved outside the `workspace` folder is dropped once the container get stopped. The only exception to this rule is if we us the `localstorage` option of `run.sh` that will store the Huggingface data (models, datasets, token) + +**TO EXECUTE**: From `./Docker` folder: ++ `./run.sh docker` : to run for docker. ++ `./run.sh podman` : to run for podman. ++ `./run.sh docker localstorage` : run with docker and mount huggingface cache to `localstorage`. ++ `./run.sh podman localstorage` : run with podman and mount huggingface cache to `localstorage`. + +### Container startup + +The file `./Docker/container_startup.sh` is executed during container startup. + +NOTE: you need to rebuild the container if you change this file in order to apply the change. + +### Launch jupyter + +An instance of jupyter is started with the container and the following text is printed in the terminal: + +```shell + To access the notebook, open this file in a browser: + file:///home/aieng/.local/share/jupyter/runtime/nbserver-32-open.html + Or copy and paste one of these URLs: + http://e44039fd787e:8888/?token=c107ed326c873303a1bee4329435ceb281e2cbd721a960a0 + or http://127.0.0.1:8888/?token=c107ed326c873303a1bee4329435ceb281e2cbd721a960a0 +``` + +To access it you need to open the last link: `http://127.0.0.1:8888/?token=c107ed326c873306d5bee4325852ceb261e2cbd622a960a0` in a browser. + +### Running jupyter on a different port than 8888 + +If you need to run jupyter on a different port (ie: 8889), open `run.sh`, set `LOCAL_PORT=8889` and run it as usual; then to connect to it replace in the access link the `8888` with `8889` like in this example: `http://127.0.0.1:8889/?token=.....` + + +## Development Workflow + +### Transient Dependencies + +When you're testing or experimenting, you often find yourself installing new packages or tools. It's important to note that, similar to environments like Kaggle or Colab notebooks, any packages you install will be transient. This means that these packages will be removed once you restart the container. If you wish for certain packages to persist, you'll need to add them to the `environment.yml` file and then rebuild the container. + +### Root Access and Security + +For security reasons, the `sudo` command is not available in the container. However, we understand that there are instances where root-level access is necessary—for example, when you need to run commands like `apt-get install`. For such scenarios, we offer a specialized script, `login_as_root_in_running_container.sh`, which allows you to log in to a running container with root privileges. + +### Making Permanent Changes + +If you find that a particular package or setting needs to be permanent, you must add it to either the `environment.yml` or the `Dockerfile`. Once you've made these changes, you'll need to rebuild the container to make them permanent. Remember, any package or setting not made permanent in this manner will be lost upon container restart. \ No newline at end of file diff --git a/Docker/build.sh b/Docker/build.sh new file mode 100755 index 0000000..aac2415 --- /dev/null +++ b/Docker/build.sh @@ -0,0 +1,32 @@ +#!/bin/bash +ENGINE=$1 +IMAGE_TAG=lmhacker + +if [[ "$ENGINE" != "docker" && "$ENGINE" != "podman" ]]; then + echo "Invalid argument. Use 'docker' or 'podman'." + exit 1 +fi + +# We're using context to avoid docker build to copy workspace folder inside container +mkdir -p ./context +cp ./container_startup.sh ./context/ +cp ../environment.yml ./context/ +cp ./nb_memory_fix.py ./context/ +cp ./lorafix.py ./context/ # temporary till HF fix peft + +# Common build args +BUILD_ARGS="-t $IMAGE_TAG -f ./Dockerfile ./context" + +# Build +if [[ "$ENGINE" == "docker" ]]; then + DOCKER_BUILDKIT=1 $ENGINE build $BUILD_ARGS +else + $ENGINE build $BUILD_ARGS +fi + +# Cleanup +rm -rf ./context + +# ./build.sh docker +# ./build.sh podman + diff --git a/Docker/container_startup.sh b/Docker/container_startup.sh new file mode 100644 index 0000000..f90c6fb --- /dev/null +++ b/Docker/container_startup.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Check needed variables +[[ -z $OPENAI_API_KEY ]] && echo "⚠️ WARNING: missing variable 'OPENAI_API_KEY'" +[[ -z $HUGGINGFACE_TOKEN ]] && echo "⚠️ WARNING: missing variable 'HUGGINGFACE_TOKEN'" + +source /opt/conda/etc/profile.d/conda.sh +conda activate $ENVNAME + +# Create hf token +python -c "from huggingface_hub import HfFolder; HfFolder.save_token(\"$HUGGINGFACE_TOKEN\")" + +jupyter notebook --ip=0.0.0.0 +#bash diff --git a/Docker/login_as_root_in_running_container.sh b/Docker/login_as_root_in_running_container.sh new file mode 100755 index 0000000..281bb0b --- /dev/null +++ b/Docker/login_as_root_in_running_container.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# NOTE: this command is meant to be used to perform any task that requires root privileges. + +ENGINE=$1 +IMAGE_TAG=lmhacker + +# Check arguments +if [[ "$ENGINE" != "docker" && "$ENGINE" != "podman" ]]; then + echo "Invalid argument. Use 'docker' or 'podman'." + exit 1 +fi + +# Run +$ENGINE exec -it -u root $(docker ps | grep $IMAGE_TAG | cut -d " " -f1) bash diff --git a/Docker/run.sh b/Docker/run.sh new file mode 100755 index 0000000..f0c715c --- /dev/null +++ b/Docker/run.sh @@ -0,0 +1,68 @@ +#!/bin/bash +ENGINE=$1 +OPTIONS=$2 +IMAGE_TAG=lmhacker +LOCAL_PORT=8888 + +# Check arguments +if [[ "$ENGINE" != "docker" && "$ENGINE" != "podman" ]]; then + echo "Invalid argument. Use 'docker' or 'podman'." + exit 1 +fi + +# Support for local storage for downloaded models and datasets +if [[ "$OPTIONS" == "localstorage" ]]; then + LS_ROOT=../localstorage + echo "ℹ️ USING LOCAL STORAGE ON: \"$LS_ROOT\"" + CACHE_HUGGINGFACE=$LS_ROOT/huggingface + # create folder with normal user rights + if [[ -v SUDO_USER ]]; then + sudo -u $SUDO_USER mkdir -p $CACHE_HUGGINGFACE + else + mkdir -p $CACHE_HUGGINGFACE + fi + LOCALSTORAGE=" -v $PWD/$CACHE_HUGGINGFACE:/home/aieng/.cache/huggingface/" +else + LOCALSTORAGE='' +fi + +# Pass keys and tokens +ENV_FILE="./env.list" +if [[ -e $ENV_FILE ]]; then + echo "TOKENS: USING: $ENV_FILE" + ENV_VARS="--env-file $ENV_FILE" +elif [[ -v OPENAI_API_KEY && -v HUGGINGFACE_TOKEN ]]; then + echo "TOKENS: USING ENV VARIABLES" + ENV_VARS="--env OPENAI_API_KEY=$OPENAI_API_KEY --env HUGGINGFACE_TOKEN=$HUGGINGFACE_TOKEN" +else + echo "❌ ERROR: you need to provide 'OPENAI_API_KEY' and 'HUGGINGFACE_TOKEN'. You have two options:" + echo "1) define both of them as environment variable" + echo "2) In this folder create a file '$ENV_FILE' that contains:" + echo "OPENAI_API_KEY=..." + echo "HUGGINGFACE_TOKEN=..." + exit 1 +fi + +# Run +$ENGINE run -it --rm --gpus all --shm-size="2g" -p 127.0.0.1:$LOCAL_PORT:8888 -v "$PWD/..":"/workspace:Z" $LOCALSTORAGE $ENV_VARS $IMAGE_TAG + +#$ENGINE run --user 1000:1000 \ +# --security-opt=no-new-privileges \ +# --cap-drop=ALL \ +# --security-opt label=type:nvidia_container_t \ +# -it \ +# --gpus all \ +# --shm-size="2g" \ +# -p 127.0.0.1:8888:8888 \ +# -v "$PWD/..":"/workspace:Z" \ +# --mount type=bind,source=/home/$USER/.cache/huggingface,target=/home/dldev/.cache/huggingface,z \ +# --env-file $ENV_FILE \ +# $IMAGE_TAG + +#for bash +#-it --gpus all \ +#-w /workspace # Set the working directory to /workspace + +# before running +#sudo chown -R $USER:$USER /home/$USER/.cache/huggingface/ +#sudo chmod -R 777 /home/$USER/.cache/huggingface/