From d225ba2ef51b4713b671da746989615de26a26d5 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 12 Dec 2023 20:58:45 -0500 Subject: [PATCH] add makefile to build dockers + fix deps for duckling in nlp docker --- .gitignore | 3 +++ Makefile | 18 ++++++++++++++++++ nlp/Dockerfile | 41 ++++++++++++++++++++++++++--------------- 3 files changed, 47 insertions(+), 15 deletions(-) create mode 100644 Makefile diff --git a/.gitignore b/.gitignore index 5e14e09..2f7b378 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,6 @@ # disallow notebooks at root ./*.ipynb **/.ipynb_checkpoints/ + +### Outputs +*.log diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e2a243d --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +# Included custom configs change the value of MAKEFILE_LIST +# Extract the required reference beforehand so we can use it for help target +MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +# Include custom config if it is available +-include Makefile.config + +# Application +APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..) +APP_NAME := $(shell basename $(APP_ROOT)) +APP_DOMAINS ?= eo nlp +DOCKER_REPO ?= crim-ca/pavics-jupyter-images + +DOCKER_BUILDS := $(addprefix docker-build-, $(APP_DOMAINS)) +$(DOCKER_BUILDS): docker-build-%: + docker build -t $(DOCKER_REPO)/$*:latest "$(APP_ROOT)/$(*)" 2>&1 | tee "$(APP_ROOT)/make-$@.log" + +.PHONY: docker-build +docker-build: $(DOCKER_BUILDS) diff --git a/nlp/Dockerfile b/nlp/Dockerfile index 8253707..c0bb53d 100644 --- a/nlp/Dockerfile +++ b/nlp/Dockerfile @@ -2,14 +2,16 @@ FROM birdhouse/pavics-jupyter-base:0.5.0 # must update conda env as root, because of a permission error when having pip dependencies in the input yml file USER root +# install pre-requirement for compiling some dependencies +RUN apt install -y pkg-config libpcre++-dev COPY environment.yml /environment.yml -COPY notebook_config.yml /notebook_config.yml # update env "birdy" # use umask 0000 so that package files for the updated environment are usable by the user for the jupyter-conda-extension -RUN umask 0000 && mamba env update -f /environment.yml \ - && mamba clean -a +RUN umask 0000 && \ + mamba env update -f /environment.yml && \ + mamba clean -y -a # Set the encoding to UTF-8, this is needed for heideltime to work properly ENV LANG=C.UTF-8 @@ -19,40 +21,49 @@ RUN python -m spacy download en_core_web_trf # Downloading the ner-large flair model RUN mkdir flair_models && \ - curl -L -o flair_models/ner-large https://huggingface.co/flair/ner-english-large/resolve/main/pytorch_model.bin > /dev/null + curl -L -o flair_models/ner-large \ + https://huggingface.co/flair/ner-english-large/resolve/main/pytorch_model.bin > /dev/null # Heideltime Tree-tagger Installation RUN mkdir -p heideltime/tree-tagger-linux-3.2.3 && cd heideltime/tree-tagger-linux-3.2.3 && \ - curl -o tree-tagger-linux-3.2.3.tar.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tree-tagger-linux-3.2.3.tar.gz && \ - curl -o tagger-scripts.tar.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tagger-scripts.tar.gz && \ - curl -o english.par.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/english.par.gz && \ - curl -o install-tagger.sh https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/install-tagger.sh && \ + curl -o tree-tagger-linux-3.2.3.tar.gz \ + https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tree-tagger-linux-3.2.3.tar.gz && \ + curl -o tagger-scripts.tar.gz \ + https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tagger-scripts.tar.gz && \ + curl -o english.par.gz \ + https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/english.par.gz && \ + curl -o install-tagger.sh \ + https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/install-tagger.sh && \ tar -xvzf tree-tagger-linux-3.2.3.tar.gz && \ sh install-tagger.sh && \ rm tree-tagger-linux-3.2.3.tar.gz tagger-scripts.tar.gz english.par.gz install-tagger.sh # Download and extract heideltime.standalone.jar -RUN cd heideltime && \ - curl -L -o heideltime-standalone-2.2.1.tar.gz https://github.com/HeidelTime/heideltime/releases/download/VERSION2.2.1/heideltime-standalone-2.2.1.tar.gz && \ +RUN cd heideltime && \ + curl -L -o heideltime-standalone-2.2.1.tar.gz \ + https://github.com/HeidelTime/heideltime/releases/download/VERSION2.2.1/heideltime-standalone-2.2.1.tar.gz && \ tar -xzvf heideltime-standalone-2.2.1.tar.gz heideltime-standalone/de.unihd.dbs.heideltime.standalone.jar && \ mv heideltime-standalone/de.unihd.dbs.heideltime.standalone.jar . && \ rmdir heideltime-standalone && \ rm heideltime-standalone-2.2.1.tar.gz +# Give read&write permission to jenkins for config +RUN chown -R jenkins heideltime + # Setup Haskell for Duckling server -# https://github.com/facebook/duckling -RUN curl -sSL https://get.haskellstack.org/ | sh && \ +RUN curl -sSL https://get.haskellstack.org/ | bash && \ git clone https://github.com/facebook/duckling && \ cd duckling && \ stack build && \ stack install && \ + cd .. && \ rm -fr duckling - -# Give read&write permission to jenkins for config -RUN chown -R jenkins heideltime +ENV PATH="/root/.local/bin:$PATH" # Give ownership of the conda cache folder to jenkins, to enable installing packages by the user from JupyterLab RUN mkdir /opt/conda/pkgs/cache && chown -R 1000:1000 /opt/conda/pkgs/cache +COPY notebook_config.yml /notebook_config.yml + # specify user because of problem running start-notebook.sh when being root USER jenkins