Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add makefile to build dockers + fix deps for duckling in nlp docker #23

Merged
merged 1 commit into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,6 @@
# disallow notebooks at root
./*.ipynb
**/.ipynb_checkpoints/

### Outputs
*.log
18 changes: 18 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Included custom configs change the value of MAKEFILE_LIST
# Extract the required reference beforehand so we can use it for help target
MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST))
# Include custom config if it is available
-include Makefile.config

# Application
APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..)
APP_NAME := $(shell basename $(APP_ROOT))
APP_DOMAINS ?= eo nlp
DOCKER_REPO ?= crim-ca/pavics-jupyter-images

DOCKER_BUILDS := $(addprefix docker-build-, $(APP_DOMAINS))
$(DOCKER_BUILDS): docker-build-%:
docker build -t $(DOCKER_REPO)/$*:latest "$(APP_ROOT)/$(*)" 2>&1 | tee "$(APP_ROOT)/[email protected]"

.PHONY: docker-build
docker-build: $(DOCKER_BUILDS)
41 changes: 26 additions & 15 deletions nlp/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ FROM birdhouse/pavics-jupyter-base:0.5.0

# must update conda env as root, because of a permission error when having pip dependencies in the input yml file
USER root
# install pre-requirement for compiling some dependencies
RUN apt install -y pkg-config libpcre++-dev

COPY environment.yml /environment.yml
COPY notebook_config.yml /notebook_config.yml

# update env "birdy"
# use umask 0000 so that package files for the updated environment are usable by the user for the jupyter-conda-extension
RUN umask 0000 && mamba env update -f /environment.yml \
&& mamba clean -a
RUN umask 0000 && \
mamba env update -f /environment.yml && \
mamba clean -y -a

# Set the encoding to UTF-8, this is needed for heideltime to work properly
ENV LANG=C.UTF-8
Expand All @@ -19,40 +21,49 @@ RUN python -m spacy download en_core_web_trf

# Downloading the ner-large flair model
RUN mkdir flair_models && \
curl -L -o flair_models/ner-large https://huggingface.co/flair/ner-english-large/resolve/main/pytorch_model.bin > /dev/null
curl -L -o flair_models/ner-large \
https://huggingface.co/flair/ner-english-large/resolve/main/pytorch_model.bin > /dev/null

# Heideltime Tree-tagger Installation
RUN mkdir -p heideltime/tree-tagger-linux-3.2.3 && cd heideltime/tree-tagger-linux-3.2.3 && \
curl -o tree-tagger-linux-3.2.3.tar.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tree-tagger-linux-3.2.3.tar.gz && \
curl -o tagger-scripts.tar.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tagger-scripts.tar.gz && \
curl -o english.par.gz https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/english.par.gz && \
curl -o install-tagger.sh https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/install-tagger.sh && \
curl -o tree-tagger-linux-3.2.3.tar.gz \
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tree-tagger-linux-3.2.3.tar.gz && \
curl -o tagger-scripts.tar.gz \
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/tagger-scripts.tar.gz && \
curl -o english.par.gz \
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/english.par.gz && \
curl -o install-tagger.sh \
https://www.cis.lmu.de/~schmid/tools/TreeTagger/data/install-tagger.sh && \
tar -xvzf tree-tagger-linux-3.2.3.tar.gz && \
sh install-tagger.sh && \
rm tree-tagger-linux-3.2.3.tar.gz tagger-scripts.tar.gz english.par.gz install-tagger.sh

# Download and extract heideltime.standalone.jar
RUN cd heideltime && \
curl -L -o heideltime-standalone-2.2.1.tar.gz https://github.com/HeidelTime/heideltime/releases/download/VERSION2.2.1/heideltime-standalone-2.2.1.tar.gz && \
RUN cd heideltime && \
curl -L -o heideltime-standalone-2.2.1.tar.gz \
https://github.com/HeidelTime/heideltime/releases/download/VERSION2.2.1/heideltime-standalone-2.2.1.tar.gz && \
tar -xzvf heideltime-standalone-2.2.1.tar.gz heideltime-standalone/de.unihd.dbs.heideltime.standalone.jar && \
mv heideltime-standalone/de.unihd.dbs.heideltime.standalone.jar . && \
rmdir heideltime-standalone && \
rm heideltime-standalone-2.2.1.tar.gz

# Give read&write permission to jenkins for config
RUN chown -R jenkins heideltime

# Setup Haskell for Duckling server
# https://github.com/facebook/duckling
RUN curl -sSL https://get.haskellstack.org/ | sh && \
RUN curl -sSL https://get.haskellstack.org/ | bash && \
git clone https://github.com/facebook/duckling && \
cd duckling && \
stack build && \
stack install && \
cd .. && \
rm -fr duckling

# Give read&write permission to jenkins for config
RUN chown -R jenkins heideltime
ENV PATH="/root/.local/bin:$PATH"

# Give ownership of the conda cache folder to jenkins, to enable installing packages by the user from JupyterLab
RUN mkdir /opt/conda/pkgs/cache && chown -R 1000:1000 /opt/conda/pkgs/cache

COPY notebook_config.yml /notebook_config.yml

# specify user because of problem running start-notebook.sh when being root
USER jenkins
Loading