Skip to content

Commit b662c6b

Browse files
authored
feat: add generation docker
Add generation docker and some common sense options
2 parents 85937d9 + d29f7eb commit b662c6b

15 files changed

+435
-261
lines changed

Dockerfile renamed to Docker/Evaluate.Dockerfile

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ RUN pip install --upgrade pip
1010
# Add a new user "wildcodeuser"
1111
RUN adduser --disabled-password --gecos "" wildcodeuser
1212

13-
COPY . /wildcode
13+
# Acquire benchmark code to local
14+
RUN git clone https://github.com/bigcode-project/code-eval.git /wildcode
1415

1516
RUN cd /wildcode && pip install . && pip install -U -I -r https://raw.githubusercontent.com/bigcode-project/wildcodebench-annotation/main/requirements.txt
1617

Docker/Generate_Cuda11.Dockerfile

+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
2+
3+
SHELL ["/bin/bash", "-c"]
4+
5+
# Setup Environment Variables
6+
ENV CUDA_HOME=/usr/local/cuda \
7+
PYTHONUNBUFFERED=1 \
8+
TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
9+
10+
# Setup System Utilities
11+
RUN apt-get update --yes --quiet \
12+
&& apt-get upgrade --yes --quiet \
13+
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --quiet --no-install-recommends \
14+
apt-utils \
15+
autoconf \
16+
automake \
17+
bc \
18+
build-essential \
19+
ca-certificates \
20+
check \
21+
cmake \
22+
curl \
23+
dmidecode \
24+
emacs \
25+
g++\
26+
gcc \
27+
git \
28+
iproute2 \
29+
jq \
30+
kmod \
31+
libaio-dev \
32+
libcurl4-openssl-dev \
33+
libgl1-mesa-glx \
34+
libglib2.0-0 \
35+
libgomp1 \
36+
libibverbs-dev \
37+
libnuma-dev \
38+
libnuma1 \
39+
libomp-dev \
40+
libsm6 \
41+
libssl-dev \
42+
libsubunit-dev \
43+
libsubunit0 \
44+
libtool \
45+
libxext6 \
46+
libxrender-dev \
47+
make \
48+
moreutils \
49+
net-tools \
50+
ninja-build \
51+
openssh-client \
52+
openssh-server \
53+
openssl \
54+
pkg-config \
55+
python3-dev \
56+
software-properties-common \
57+
sudo \
58+
unzip \
59+
util-linux \
60+
vim \
61+
wget \
62+
zlib1g-dev \
63+
&& apt-get autoremove \
64+
&& apt-get clean \
65+
&& rm -rf /var/lib/apt/lists/
66+
67+
# Setup base Python to bootstrap Mamba
68+
RUN add-apt-repository --yes ppa:deadsnakes/ppa \
69+
&& apt-get update --yes --quiet
70+
RUN DEBIAN_FRONTEND=noninteractive apt-get install --yes --quiet --no-install-recommends \
71+
python3.11 \
72+
python3.11-dev \
73+
python3.11-distutils \
74+
python3.11-lib2to3 \
75+
python3.11-gdbm \
76+
python3.11-tk \
77+
pip
78+
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 999 \
79+
&& update-alternatives --config python3 \
80+
&& ln -s /usr/bin/python3 /usr/bin/python
81+
RUN pip install --upgrade pip
82+
83+
# Setup optimized Mamba environment with required PyTorch dependencies
84+
RUN wget -O /tmp/Miniforge.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Mambaforge-24.3.0-0-Linux-x86_64.sh \
85+
&& bash /tmp/Miniforge.sh -b -p /Miniforge \
86+
&& source /Miniforge/etc/profile.d/conda.sh \
87+
&& source /Miniforge/etc/profile.d/mamba.sh \
88+
&& mamba update -y -q -n base -c defaults mamba \
89+
&& mamba create -y -q -n Code-Eval python=3.11 setuptools=69.5.1 \
90+
&& mamba activate Code-Eval \
91+
&& mamba install -y -q -c conda-forge \
92+
charset-normalizer \
93+
gputil \
94+
ipython \
95+
numpy \
96+
pandas \
97+
scikit-learn \
98+
wandb \
99+
&& mamba install -y -q -c intel \
100+
"mkl==2023" \
101+
"mkl-static==2023" \
102+
"mkl-include==2023" \
103+
&& mamba install -y -q -c pytorch magma-cuda118 \
104+
&& mamba clean -a -f -y
105+
106+
# Install VLLM precompiled with appropriate CUDA and ensure PyTorch is installed form the same version channel
107+
RUN source /Miniforge/etc/profile.d/conda.sh \
108+
&& source /Miniforge/etc/profile.d/mamba.sh \
109+
&& mamba activate Code-Eval \
110+
&& pip install https://github.com/vllm-project/vllm/releases/download/v0.4.0/vllm-0.4.0+cu118-cp311-cp311-manylinux1_x86_64.whl \
111+
--extra-index-url https://download.pytorch.org/whl/cu118
112+
113+
# Install Flash Attention
114+
RUN source /Miniforge/etc/profile.d/conda.sh \
115+
&& source /Miniforge/etc/profile.d/mamba.sh \
116+
&& mamba activate Code-Eval \
117+
&& export MAX_JOBS=$(($(nproc) - 2)) \
118+
&& pip install --no-cache-dir ninja packaging psutil \
119+
&& pip install flash-attn==2.5.8 --no-build-isolation
120+
121+
# Acquire benchmark code to local
122+
RUN git clone https://github.com/bigcode-project/code-eval.git /wildcode
123+
124+
# Install Code-Eval and pre-load the dataset
125+
RUN source /Miniforge/etc/profile.d/conda.sh \
126+
&& source /Miniforge/etc/profile.d/mamba.sh \
127+
&& mamba activate Code-Eval \
128+
&& pip install wild-code --upgrade \
129+
&& python -c "from wildcode.data import get_wildcodebench; get_wildcodebench()"
130+
131+
WORKDIR /wildcode
132+
133+
# Declare an argument for the huggingface token
134+
ARG HF_TOKEN
135+
RUN if [[ -n "$HF_TOKEN" ]] ; then /Miniforge/envs/Code-Eval/bin/huggingface-cli login --token $HF_TOKEN ; \
136+
else echo "No HuggingFace token specified. Access to gated or private models will be unavailable." ; fi
137+
138+
ENTRYPOINT ["/Miniforge/envs/Code-Eval/bin/python", "-m", "wildcode.generate"]

Docker/Generate_Cuda12.Dockerfile

+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
2+
3+
SHELL ["/bin/bash", "-c"]
4+
5+
# Setup Environment Variables
6+
ENV CUDA_HOME=/usr/local/cuda \
7+
PYTHONUNBUFFERED=1 \
8+
TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
9+
10+
# Setup System Utilities
11+
RUN apt-get update --yes --quiet \
12+
&& apt-get upgrade --yes --quiet \
13+
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --quiet --no-install-recommends \
14+
apt-utils \
15+
autoconf \
16+
automake \
17+
bc \
18+
build-essential \
19+
ca-certificates \
20+
check \
21+
cmake \
22+
curl \
23+
dmidecode \
24+
emacs \
25+
g++\
26+
gcc \
27+
git \
28+
iproute2 \
29+
jq \
30+
kmod \
31+
libaio-dev \
32+
libcurl4-openssl-dev \
33+
libgl1-mesa-glx \
34+
libglib2.0-0 \
35+
libgomp1 \
36+
libibverbs-dev \
37+
libnuma-dev \
38+
libnuma1 \
39+
libomp-dev \
40+
libsm6 \
41+
libssl-dev \
42+
libsubunit-dev \
43+
libsubunit0 \
44+
libtool \
45+
libxext6 \
46+
libxrender-dev \
47+
make \
48+
moreutils \
49+
net-tools \
50+
ninja-build \
51+
openssh-client \
52+
openssh-server \
53+
openssl \
54+
pkg-config \
55+
python3-dev \
56+
software-properties-common \
57+
sudo \
58+
unzip \
59+
util-linux \
60+
vim \
61+
wget \
62+
zlib1g-dev \
63+
&& apt-get autoremove \
64+
&& apt-get clean \
65+
&& rm -rf /var/lib/apt/lists/
66+
67+
# Setup base Python to bootstrap Mamba
68+
RUN add-apt-repository --yes ppa:deadsnakes/ppa \
69+
&& apt-get update --yes --quiet
70+
RUN DEBIAN_FRONTEND=noninteractive apt-get install --yes --quiet --no-install-recommends \
71+
python3.11 \
72+
python3.11-dev \
73+
python3.11-distutils \
74+
python3.11-lib2to3 \
75+
python3.11-gdbm \
76+
python3.11-tk \
77+
pip
78+
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 999 \
79+
&& update-alternatives --config python3 \
80+
&& ln -s /usr/bin/python3 /usr/bin/python
81+
RUN pip install --upgrade pip
82+
83+
# Setup optimized Mamba environment with required PyTorch dependencies
84+
RUN wget -O /tmp/Miniforge.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Mambaforge-24.3.0-0-Linux-x86_64.sh \
85+
&& bash /tmp/Miniforge.sh -b -p /Miniforge \
86+
&& source /Miniforge/etc/profile.d/conda.sh \
87+
&& source /Miniforge/etc/profile.d/mamba.sh \
88+
&& mamba update -y -q -n base -c defaults mamba \
89+
&& mamba create -y -q -n Code-Eval python=3.11 setuptools=69.5.1 \
90+
&& mamba activate Code-Eval \
91+
&& mamba install -y -q -c conda-forge \
92+
charset-normalizer \
93+
gputil \
94+
ipython \
95+
numpy \
96+
pandas \
97+
scikit-learn \
98+
wandb \
99+
&& mamba install -y -q -c intel \
100+
"mkl==2023" \
101+
"mkl-static==2023" \
102+
"mkl-include==2023" \
103+
&& mamba install -y -q -c pytorch magma-cuda121 \
104+
&& mamba clean -a -f -y
105+
106+
# Install VLLM precompiled with appropriate CUDA and ensure PyTorch is installed form the same version channel
107+
RUN source /Miniforge/etc/profile.d/conda.sh \
108+
&& source /Miniforge/etc/profile.d/mamba.sh \
109+
&& mamba activate Code-Eval \
110+
&& pip install https://github.com/vllm-project/vllm/releases/download/v0.4.0/vllm-0.4.0-cp311-cp311-manylinux1_x86_64.whl \
111+
--extra-index-url https://download.pytorch.org/whl/cu121
112+
113+
# Install Flash Attention
114+
RUN source /Miniforge/etc/profile.d/conda.sh \
115+
&& source /Miniforge/etc/profile.d/mamba.sh \
116+
&& mamba activate Code-Eval \
117+
&& export MAX_JOBS=$(($(nproc) - 2)) \
118+
&& pip install --no-cache-dir ninja packaging psutil \
119+
&& pip install flash-attn==2.5.8 --no-build-isolation
120+
121+
# Acquire benchmark code to local
122+
RUN git clone https://github.com/bigcode-project/code-eval.git /wildcode
123+
124+
# Install Code-Eval and pre-load the dataset
125+
RUN source /Miniforge/etc/profile.d/conda.sh \
126+
&& source /Miniforge/etc/profile.d/mamba.sh \
127+
&& mamba activate Code-Eval \
128+
&& pip install wild-code --upgrade \
129+
&& python -c "from wildcode.data import get_wildcodebench; get_wildcodebench()"
130+
131+
WORKDIR /wildcode
132+
133+
# Declare an argument for the huggingface token
134+
ARG HF_TOKEN
135+
RUN if [[ -n "$HF_TOKEN" ]] ; then /Miniforge/envs/Code-Eval/bin/huggingface-cli login --token $HF_TOKEN ; \
136+
else echo "No HuggingFace token specified. Access to gated or private models will be unavailable." ; fi
137+
138+
ENTRYPOINT ["/Miniforge/envs/Code-Eval/bin/python", "-m", "wildcode.generate"]

README.md

+29-5
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ pip install -U flash-attn
8989
```
9090

9191
To generate code samples from a model, you can use the following command:
92-
92+
>
9393
```shell
9494
wildcode.generate \
9595
--model [model_name] \
@@ -100,11 +100,35 @@ wildcode.generate \
100100
--temperature [temp] \
101101
--n_samples [n_samples] \
102102
--resume \
103-
--backend [vllm|hf|openai|mistral|anthropic|google]
103+
--backend [vllm|hf|openai|mistral|anthropic|google] \
104104
--tp [gpu_number]
105105
```
106-
The generated code samples will be stored in a file named `[model_name]--wildcodebench-[nl2c|c2c]--[backend]-[temp]-[n_samples].jsonl`.
107-
106+
>
107+
The generated code samples will be stored in a file named `[model_name]--wildcodebench-[nl2c|c2c]--[backend]-[temp]-[n_samples].jsonl`. Alternatively, you can use the following command to utilize our pre-built docker images for generating code samples:
108+
>
109+
```shell
110+
docker run --gpus '"device=$CUDA_VISIBLE_DEVICES"' -v $(pwd):/wildcode -t codeeval/code-eval-generate-cu11:25052024 --model [model_name] \
111+
--dataset [wildcodebench] \
112+
--nl2code [False|True] \
113+
--greedy \
114+
--bs [bs] \
115+
--temperature [temp] \
116+
--n_samples [n_samples] \
117+
--resume \
118+
--backend [vllm|hf|openai|mistral|anthropic|google] \
119+
--tp [gpu_number]
120+
```
121+
>
122+
We make available `cuda 11.8.0` and `cuda 12.1.1` pre-built docker images with the Dockerfiles available in the `Docker` directory.
123+
>
124+
If you wish to use gated or private HuggingFace models and datasets, you need to build the container yourself with `--build-arg` flags as follows:
125+
>
126+
```shell
127+
docker build --build-arg HF_TOKEN=<YOUR_HF_TOKEN> -t codeeval/code-eval-generate-cu11:latest - < Docker/Generate_Cuda11.Dockerfile
128+
```
129+
>
130+
Following which, you can run the built container as shown in above.
131+
>
108132
<details><summary>🤔 Structure of `problem`? <i>:: click to expand ::</i></summary>
109133
<div>
110134
@@ -164,7 +188,7 @@ You are strongly recommended to use a sandbox such as [docker](https://docs.dock
164188

165189
```shell
166190
# mount the current directory to the container
167-
docker run -v $(pwd):/wildcode terryzho/wildcode:latest --dataset wildcodebench --samples samples.jsonl
191+
docker run -v $(pwd):/wildcode codeeval/code-eval-evaluate:latest --dataset wildcodebench --samples samples.jsonl
168192
# ...Or locally ⚠️
169193
wildcode.evaluate --dataset wildcodebench --samples samples.jsonl
170194
```

0 commit comments

Comments
 (0)