bigcode-project
diff --git a/‎Dockerfile renamed to ‎Docker/Evaluate.Dockerfile
+2-1 b/‎Dockerfile renamed to ‎Docker/Evaluate.Dockerfile
+2-1
diff --git a/‎Docker/Generate_Cuda11.Dockerfile
+138 b/‎Docker/Generate_Cuda11.Dockerfile
+138
diff --git a/‎Docker/Generate_Cuda12.Dockerfile
+138 b/‎Docker/Generate_Cuda12.Dockerfile
+138
diff --git a/‎README.md
+29-5 b/‎README.md
+29-5
@@ -10,7 +10,8 @@ RUN pip install --upgrade pip
 # Add a new user "wildcodeuser"
 RUN adduser --disabled-password --gecos "" wildcodeuser
 
-COPY . /wildcode
+# Acquire benchmark code to local
+RUN git clone https://github.com/bigcode-project/code-eval.git /wildcode
 
 RUN cd /wildcode && pip install . && pip install -U -I -r https://raw.githubusercontent.com/bigcode-project/wildcodebench-annotation/main/requirements.txt
 
 
@@ -0,0 +1,138 @@
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
+
+SHELL ["/bin/bash", "-c"]
+
+# Setup Environment Variables
+ENV CUDA_HOME=/usr/local/cuda \
+    PYTHONUNBUFFERED=1 \
+    TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+
+# Setup System Utilities
+RUN apt-get update --yes --quiet \
+    && apt-get upgrade --yes --quiet \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --yes --quiet --no-install-recommends \
+        apt-utils \
+        autoconf \
+        automake \
+        bc \
+        build-essential \
+        ca-certificates \
+        check \
+        cmake \
+        curl \
+        dmidecode \
+        emacs \
+        g++\
+        gcc \
+        git \
+        iproute2 \
+        jq \
+        kmod \
+        libaio-dev \
+        libcurl4-openssl-dev \
+        libgl1-mesa-glx \
+        libglib2.0-0 \
+        libgomp1 \
+        libibverbs-dev \
+        libnuma-dev \
+        libnuma1 \
+        libomp-dev \
+        libsm6 \
+        libssl-dev \
+        libsubunit-dev \
+        libsubunit0 \
+        libtool \
+        libxext6 \
+        libxrender-dev \
+        make \
+        moreutils \
+        net-tools \
+        ninja-build \
+        openssh-client \
+        openssh-server \
+        openssl \
+        pkg-config \
+        python3-dev \
+        software-properties-common \
+        sudo \
+        unzip \
+        util-linux \
+        vim \
+        wget \
+        zlib1g-dev \
+    && apt-get autoremove \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/
+
+# Setup base Python to bootstrap Mamba
+RUN add-apt-repository --yes ppa:deadsnakes/ppa \
+    && apt-get update --yes --quiet
+RUN DEBIAN_FRONTEND=noninteractive apt-get install --yes --quiet --no-install-recommends \
+        python3.11 \
+        python3.11-dev \
+        python3.11-distutils \
+        python3.11-lib2to3 \
+        python3.11-gdbm \
+        python3.11-tk \
+        pip
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 999 \
+    && update-alternatives --config python3 \
+    && ln -s /usr/bin/python3 /usr/bin/python
+RUN pip install --upgrade pip
+
+# Setup optimized Mamba environment with required PyTorch dependencies
+RUN wget -O /tmp/Miniforge.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Mambaforge-24.3.0-0-Linux-x86_64.sh \
+    && bash /tmp/Miniforge.sh -b -p /Miniforge \
+    && source /Miniforge/etc/profile.d/conda.sh \
+    && source /Miniforge/etc/profile.d/mamba.sh \
+    && mamba update -y -q -n base -c defaults mamba \
+    && mamba create -y -q -n Code-Eval python=3.11 setuptools=69.5.1 \
+    && mamba activate Code-Eval \
+    && mamba install -y -q -c conda-forge \
+        charset-normalizer \
+        gputil \
+        ipython \
+        numpy \
+        pandas \
+        scikit-learn \
+        wandb \
+    && mamba install -y -q -c intel \
+        "mkl==2023" \
+        "mkl-static==2023" \
+        "mkl-include==2023" \
+    && mamba install -y -q -c pytorch magma-cuda118 \
+    && mamba clean -a -f -y
+
+# Install VLLM precompiled with appropriate CUDA and ensure PyTorch is installed form the same version channel
+RUN source /Miniforge/etc/profile.d/conda.sh \
+    && source /Miniforge/etc/profile.d/mamba.sh \
+    && mamba activate Code-Eval \
+    && pip install https://github.com/vllm-project/vllm/releases/download/v0.4.0/vllm-0.4.0+cu118-cp311-cp311-manylinux1_x86_64.whl \
+        --extra-index-url https://download.pytorch.org/whl/cu118
+
+# Install Flash Attention
+RUN source /Miniforge/etc/profile.d/conda.sh \
+    && source /Miniforge/etc/profile.d/mamba.sh \
+    && mamba activate Code-Eval \
+    && export MAX_JOBS=$(($(nproc) - 2)) \
+    && pip install --no-cache-dir ninja packaging psutil \
+    && pip install flash-attn==2.5.8 --no-build-isolation
+
+# Acquire benchmark code to local
+RUN git clone https://github.com/bigcode-project/code-eval.git /wildcode
+
+# Install Code-Eval and pre-load the dataset
+RUN source /Miniforge/etc/profile.d/conda.sh \
+    && source /Miniforge/etc/profile.d/mamba.sh \
+    && mamba activate Code-Eval \
+    && pip install wild-code --upgrade \
+    && python -c "from wildcode.data import get_wildcodebench; get_wildcodebench()"
+
+WORKDIR /wildcode
+
+# Declare an argument for the huggingface token
+ARG HF_TOKEN
+RUN if [[ -n "$HF_TOKEN" ]] ; then /Miniforge/envs/Code-Eval/bin/huggingface-cli login --token $HF_TOKEN ; \
+    else echo "No HuggingFace token specified. Access to gated or private models will be unavailable." ; fi
+
+ENTRYPOINT ["/Miniforge/envs/Code-Eval/bin/python", "-m", "wildcode.generate"]
@@ -0,0 +1,138 @@
+FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
+
+SHELL ["/bin/bash", "-c"]
+
+# Setup Environment Variables
+ENV CUDA_HOME=/usr/local/cuda \
+    PYTHONUNBUFFERED=1 \
+    TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+
+# Setup System Utilities
+RUN apt-get update --yes --quiet \
+    && apt-get upgrade --yes --quiet \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --yes --quiet --no-install-recommends \
+        apt-utils \
+        autoconf \
+        automake \
+        bc \
+        build-essential \
+        ca-certificates \
+        check \
+        cmake \
+        curl \
+        dmidecode \
+        emacs \
+        g++\
+        gcc \
+        git \
+        iproute2 \
+        jq \
+        kmod \
+        libaio-dev \
+        libcurl4-openssl-dev \
+        libgl1-mesa-glx \
+        libglib2.0-0 \
+        libgomp1 \
+        libibverbs-dev \
+        libnuma-dev \
+        libnuma1 \
+        libomp-dev \
+        libsm6 \
+        libssl-dev \
+        libsubunit-dev \
+        libsubunit0 \
+        libtool \
+        libxext6 \
+        libxrender-dev \
+        make \
+        moreutils \
+        net-tools \
+        ninja-build \
+        openssh-client \
+        openssh-server \
+        openssl \
+        pkg-config \
+        python3-dev \
+        software-properties-common \
+        sudo \
+        unzip \
+        util-linux \
+        vim \
+        wget \
+        zlib1g-dev \
+    && apt-get autoremove \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/
+
+# Setup base Python to bootstrap Mamba
+RUN add-apt-repository --yes ppa:deadsnakes/ppa \
+    && apt-get update --yes --quiet
+RUN DEBIAN_FRONTEND=noninteractive apt-get install --yes --quiet --no-install-recommends \
+        python3.11 \
+        python3.11-dev \
+        python3.11-distutils \
+        python3.11-lib2to3 \
+        python3.11-gdbm \
+        python3.11-tk \
+        pip
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 999 \
+    && update-alternatives --config python3 \
+    && ln -s /usr/bin/python3 /usr/bin/python
+RUN pip install --upgrade pip
+
+# Setup optimized Mamba environment with required PyTorch dependencies
+RUN wget -O /tmp/Miniforge.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Mambaforge-24.3.0-0-Linux-x86_64.sh \
+    && bash /tmp/Miniforge.sh -b -p /Miniforge \
+    && source /Miniforge/etc/profile.d/conda.sh \
+    && source /Miniforge/etc/profile.d/mamba.sh \
+    && mamba update -y -q -n base -c defaults mamba \
+    && mamba create -y -q -n Code-Eval python=3.11 setuptools=69.5.1 \
+    && mamba activate Code-Eval \
+    && mamba install -y -q -c conda-forge \
+        charset-normalizer \
+        gputil \
+        ipython \
+        numpy \
+        pandas \
+        scikit-learn \
+        wandb \
+    && mamba install -y -q -c intel \
+        "mkl==2023" \
+        "mkl-static==2023" \
+        "mkl-include==2023" \
+    && mamba install -y -q -c pytorch magma-cuda121 \
+    && mamba clean -a -f -y
+
+# Install VLLM precompiled with appropriate CUDA and ensure PyTorch is installed form the same version channel
+RUN source /Miniforge/etc/profile.d/conda.sh \
+    && source /Miniforge/etc/profile.d/mamba.sh \
+    && mamba activate Code-Eval \
+    && pip install https://github.com/vllm-project/vllm/releases/download/v0.4.0/vllm-0.4.0-cp311-cp311-manylinux1_x86_64.whl \
+        --extra-index-url https://download.pytorch.org/whl/cu121
+
+# Install Flash Attention
+RUN source /Miniforge/etc/profile.d/conda.sh \
+    && source /Miniforge/etc/profile.d/mamba.sh \
+    && mamba activate Code-Eval \
+    && export MAX_JOBS=$(($(nproc) - 2)) \
+    && pip install --no-cache-dir ninja packaging psutil \
+    && pip install flash-attn==2.5.8 --no-build-isolation
+
+# Acquire benchmark code to local
+RUN git clone https://github.com/bigcode-project/code-eval.git /wildcode
+
+# Install Code-Eval and pre-load the dataset
+RUN source /Miniforge/etc/profile.d/conda.sh \
+    && source /Miniforge/etc/profile.d/mamba.sh \
+    && mamba activate Code-Eval \
+    && pip install wild-code --upgrade \
+    && python -c "from wildcode.data import get_wildcodebench; get_wildcodebench()"
+
+WORKDIR /wildcode
+
+# Declare an argument for the huggingface token
+ARG HF_TOKEN
+RUN if [[ -n "$HF_TOKEN" ]] ; then /Miniforge/envs/Code-Eval/bin/huggingface-cli login --token $HF_TOKEN ; \
+    else echo "No HuggingFace token specified. Access to gated or private models will be unavailable." ; fi
+
+ENTRYPOINT ["/Miniforge/envs/Code-Eval/bin/python", "-m", "wildcode.generate"]
@@ -89,7 +89,7 @@ pip install -U flash-attn
 ```
 
 To generate code samples from a model, you can use the following command:
-
+>
 ```shell
 wildcode.generate \
     --model [model_name] \
@@ -100,11 +100,35 @@ wildcode.generate \
     --temperature [temp] \
     --n_samples [n_samples] \
     --resume \
-    --backend [vllm|hf|openai|mistral|anthropic|google]
+    --backend [vllm|hf|openai|mistral|anthropic|google] \
     --tp [gpu_number]
 ```
-The generated code samples will be stored in a file named `[model_name]--wildcodebench-[nl2c|c2c]--[backend]-[temp]-[n_samples].jsonl`.
-
+>
+The generated code samples will be stored in a file named `[model_name]--wildcodebench-[nl2c|c2c]--[backend]-[temp]-[n_samples].jsonl`. Alternatively, you can use the following command to utilize our pre-built docker images for generating code samples:
+>
+```shell
+docker run --gpus '"device=$CUDA_VISIBLE_DEVICES"' -v $(pwd):/wildcode -t codeeval/code-eval-generate-cu11:25052024 --model [model_name] \ 
+    --dataset [wildcodebench] \
+    --nl2code [False|True] \
+    --greedy \
+    --bs [bs] \   
+    --temperature [temp] \
+    --n_samples [n_samples] \
+    --resume \
+    --backend [vllm|hf|openai|mistral|anthropic|google] \
+    --tp [gpu_number]
+```
+>
+We make available `cuda 11.8.0` and `cuda 12.1.1` pre-built docker images with the Dockerfiles available in the `Docker` directory.
+>
+If you wish to use gated or private HuggingFace models and datasets, you need to build the container yourself with `--build-arg` flags as follows:
+>
+```shell
+docker build --build-arg HF_TOKEN=<YOUR_HF_TOKEN> -t codeeval/code-eval-generate-cu11:latest - < Docker/Generate_Cuda11.Dockerfile
+```
+>
+Following which, you can run the built container as shown in above.
+>
 <details><summary>🤔 Structure of `problem`? <i>:: click to expand ::</i></summary>
 <div>
 
@@ -164,7 +188,7 @@ You are strongly recommended to use a sandbox such as [docker](https://docs.dock
 
 ```shell
 # mount the current directory to the container
-docker run -v $(pwd):/wildcode terryzho/wildcode:latest --dataset wildcodebench --samples samples.jsonl
+docker run -v $(pwd):/wildcode codeeval/code-eval-evaluate:latest --dataset wildcodebench --samples samples.jsonl
 # ...Or locally ⚠️
 wildcode.evaluate --dataset wildcodebench --samples samples.jsonl
 ```